New src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/heapRegion.hpp"
  42 #endif // INCLUDE_ALL_GCS
  43 
  44 #ifdef PRODUCT
  45 #define BLOCK_COMMENT(str) /* nothing */
  46 #define STOP(error) stop(error)
  47 #else
  48 #define BLOCK_COMMENT(str) block_comment(str)
  49 #define STOP(error) block_comment(error); stop(error)
  50 #endif
  51 
  52 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  53 // Implementation of AddressLiteral
  54 
  55 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  56 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  57   // -----------------Table 4.5 -------------------- //
  58   16, 32, 64,  // EVEX_FV(0)
  59   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  60   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  61   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  62   8,  16, 32,  // EVEX_HV(0)
  63   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  64   // -----------------Table 4.6 -------------------- //
  65   16, 32, 64,  // EVEX_FVM(0)
  66   1,  1,  1,   // EVEX_T1S(0)
  67   2,  2,  2,   // EVEX_T1S(1)
  68   4,  4,  4,   // EVEX_T1S(2)
  69   8,  8,  8,   // EVEX_T1S(3)
  70   4,  4,  4,   // EVEX_T1F(0)
  71   8,  8,  8,   // EVEX_T1F(1)
  72   8,  8,  8,   // EVEX_T2(0)
  73   0,  16, 16,  // EVEX_T2(1)
  74   0,  16, 16,  // EVEX_T4(0)
  75   0,  0,  32,  // EVEX_T4(1)
  76   0,  0,  32,  // EVEX_T8(0)
  77   8,  16, 32,  // EVEX_HVM(0)
  78   4,  8,  16,  // EVEX_QVM(0)
  79   2,  4,  8,   // EVEX_OVM(0)
  80   16, 16, 16,  // EVEX_M128(0)
  81   8,  32, 64,  // EVEX_DUP(0)
  82   0,  0,  0    // EVEX_NTUP
  83 };
  84 
  85 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  86   _is_lval = false;
  87   _target = target;
  88   switch (rtype) {
  89   case relocInfo::oop_type:
  90   case relocInfo::metadata_type:
  91     // Oops are a special case. Normally they would be their own section
  92     // but in cases like icBuffer they are literals in the code stream that
  93     // we don't have a section for. We use none so that we get a literal address
  94     // which is always patchable.
  95     break;
  96   case relocInfo::external_word_type:
  97     _rspec = external_word_Relocation::spec(target);
  98     break;
  99   case relocInfo::internal_word_type:
 100     _rspec = internal_word_Relocation::spec(target);
 101     break;
 102   case relocInfo::opt_virtual_call_type:
 103     _rspec = opt_virtual_call_Relocation::spec();
 104     break;
 105   case relocInfo::static_call_type:
 106     _rspec = static_call_Relocation::spec();
 107     break;
 108   case relocInfo::runtime_call_type:
 109     _rspec = runtime_call_Relocation::spec();
 110     break;
 111   case relocInfo::poll_type:
 112   case relocInfo::poll_return_type:
 113     _rspec = Relocation::spec_simple(rtype);
 114     break;
 115   case relocInfo::none:
 116     break;
 117   default:
 118     ShouldNotReachHere();
 119     break;
 120   }
 121 }
 122 
 123 // Implementation of Address
 124 
 125 #ifdef _LP64
 126 
 127 Address Address::make_array(ArrayAddress adr) {
 128   // Not implementable on 64bit machines
 129   // Should have been handled higher up the call chain.
 130   ShouldNotReachHere();
 131   return Address();
 132 }
 133 
 134 // exceedingly dangerous constructor
 135 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 136   _base  = noreg;
 137   _index = noreg;
 138   _scale = no_scale;
 139   _disp  = disp;
 140   _xmmindex = xnoreg;
 141   _isxmmindex = false;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181   _xmmindex = xnoreg;
 182   _isxmmindex = false;
 183 }
 184 
 185 #endif // _LP64
 186 
 187 
 188 
 189 // Convert the raw encoding form into the form expected by the constructor for
 190 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 191 // that to noreg for the Address constructor.
 192 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 193   RelocationHolder rspec;
 194   if (disp_reloc != relocInfo::none) {
 195     rspec = Relocation::spec_simple(disp_reloc);
 196   }
 197   bool valid_index = index != rsp->encoding();
 198   if (valid_index) {
 199     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 200     madr._rspec = rspec;
 201     return madr;
 202   } else {
 203     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 204     madr._rspec = rspec;
 205     return madr;
 206   }
 207 }
 208 
 209 // Implementation of Assembler
 210 
 211 int AbstractAssembler::code_fill_byte() {
 212   return (u_char)'\xF4'; // hlt
 213 }
 214 
 215 // make this go away someday
 216 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 217   if (rtype == relocInfo::none)
 218     emit_int32(data);
 219   else
 220     emit_data(data, Relocation::spec_simple(rtype), format);
 221 }
 222 
 223 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 224   assert(imm_operand == 0, "default format must be immediate in this file");
 225   assert(inst_mark() != NULL, "must be inside InstructionMark");
 226   if (rspec.type() !=  relocInfo::none) {
 227     #ifdef ASSERT
 228       check_relocation(rspec, format);
 229     #endif
 230     // Do not use AbstractAssembler::relocate, which is not intended for
 231     // embedded words.  Instead, relocate to the enclosing instruction.
 232 
 233     // hack. call32 is too wide for mask so use disp32
 234     if (format == call32_operand)
 235       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 236     else
 237       code_section()->relocate(inst_mark(), rspec, format);
 238   }
 239   emit_int32(data);
 240 }
 241 
 242 static int encode(Register r) {
 243   int enc = r->encoding();
 244   if (enc >= 8) {
 245     enc -= 8;
 246   }
 247   return enc;
 248 }
 249 
 250 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 251   assert(dst->has_byte_register(), "must have byte register");
 252   assert(isByte(op1) && isByte(op2), "wrong opcode");
 253   assert(isByte(imm8), "not a byte");
 254   assert((op1 & 0x01) == 0, "should be 8bit operation");
 255   emit_int8(op1);
 256   emit_int8(op2 | encode(dst));
 257   emit_int8(imm8);
 258 }
 259 
 260 
 261 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 262   assert(isByte(op1) && isByte(op2), "wrong opcode");
 263   assert((op1 & 0x01) == 1, "should be 32bit operation");
 264   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 265   if (is8bit(imm32)) {
 266     emit_int8(op1 | 0x02); // set sign bit
 267     emit_int8(op2 | encode(dst));
 268     emit_int8(imm32 & 0xFF);
 269   } else {
 270     emit_int8(op1);
 271     emit_int8(op2 | encode(dst));
 272     emit_int32(imm32);
 273   }
 274 }
 275 
 276 // Force generation of a 4 byte immediate value even if it fits into 8bit
 277 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 278   assert(isByte(op1) && isByte(op2), "wrong opcode");
 279   assert((op1 & 0x01) == 1, "should be 32bit operation");
 280   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 281   emit_int8(op1);
 282   emit_int8(op2 | encode(dst));
 283   emit_int32(imm32);
 284 }
 285 
 286 // immediate-to-memory forms
 287 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 288   assert((op1 & 0x01) == 1, "should be 32bit operation");
 289   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 290   if (is8bit(imm32)) {
 291     emit_int8(op1 | 0x02); // set sign bit
 292     emit_operand(rm, adr, 1);
 293     emit_int8(imm32 & 0xFF);
 294   } else {
 295     emit_int8(op1);
 296     emit_operand(rm, adr, 4);
 297     emit_int32(imm32);
 298   }
 299 }
 300 
 301 
 302 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 303   assert(isByte(op1) && isByte(op2), "wrong opcode");
 304   emit_int8(op1);
 305   emit_int8(op2 | encode(dst) << 3 | encode(src));
 306 }
 307 
 308 
 309 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 310                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 311   int mod_idx = 0;
 312   // We will test if the displacement fits the compressed format and if so
 313   // apply the compression to the displacment iff the result is8bit.
 314   if (VM_Version::supports_evex() && is_evex_inst) {
 315     switch (cur_tuple_type) {
 316     case EVEX_FV:
 317       if ((cur_encoding & VEX_W) == VEX_W) {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 319       } else {
 320         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 321       }
 322       break;
 323 
 324     case EVEX_HV:
 325       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 326       break;
 327 
 328     case EVEX_FVM:
 329       break;
 330 
 331     case EVEX_T1S:
 332       switch (in_size_in_bits) {
 333       case EVEX_8bit:
 334         break;
 335 
 336       case EVEX_16bit:
 337         mod_idx = 1;
 338         break;
 339 
 340       case EVEX_32bit:
 341         mod_idx = 2;
 342         break;
 343 
 344       case EVEX_64bit:
 345         mod_idx = 3;
 346         break;
 347       }
 348       break;
 349 
 350     case EVEX_T1F:
 351     case EVEX_T2:
 352     case EVEX_T4:
 353       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 354       break;
 355 
 356     case EVEX_T8:
 357       break;
 358 
 359     case EVEX_HVM:
 360       break;
 361 
 362     case EVEX_QVM:
 363       break;
 364 
 365     case EVEX_OVM:
 366       break;
 367 
 368     case EVEX_M128:
 369       break;
 370 
 371     case EVEX_DUP:
 372       break;
 373 
 374     default:
 375       assert(0, "no valid evex tuple_table entry");
 376       break;
 377     }
 378 
 379     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 380       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 381       if ((disp % disp_factor) == 0) {
 382         int new_disp = disp / disp_factor;
 383         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 384           disp = new_disp;
 385         }
 386       } else {
 387         return false;
 388       }
 389     }
 390   }
 391   return (-0x80 <= disp && disp < 0x80);
 392 }
 393 
 394 
 395 bool Assembler::emit_compressed_disp_byte(int &disp) {
 396   int mod_idx = 0;
 397   // We will test if the displacement fits the compressed format and if so
 398   // apply the compression to the displacment iff the result is8bit.
 399   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 400     int evex_encoding = _attributes->get_evex_encoding();
 401     int tuple_type = _attributes->get_tuple_type();
 402     switch (tuple_type) {
 403     case EVEX_FV:
 404       if ((evex_encoding & VEX_W) == VEX_W) {
 405         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 406       } else {
 407         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 408       }
 409       break;
 410 
 411     case EVEX_HV:
 412       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 413       break;
 414 
 415     case EVEX_FVM:
 416       break;
 417 
 418     case EVEX_T1S:
 419       switch (_attributes->get_input_size()) {
 420       case EVEX_8bit:
 421         break;
 422 
 423       case EVEX_16bit:
 424         mod_idx = 1;
 425         break;
 426 
 427       case EVEX_32bit:
 428         mod_idx = 2;
 429         break;
 430 
 431       case EVEX_64bit:
 432         mod_idx = 3;
 433         break;
 434       }
 435       break;
 436 
 437     case EVEX_T1F:
 438     case EVEX_T2:
 439     case EVEX_T4:
 440       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 441       break;
 442 
 443     case EVEX_T8:
 444       break;
 445 
 446     case EVEX_HVM:
 447       break;
 448 
 449     case EVEX_QVM:
 450       break;
 451 
 452     case EVEX_OVM:
 453       break;
 454 
 455     case EVEX_M128:
 456       break;
 457 
 458     case EVEX_DUP:
 459       break;
 460 
 461     default:
 462       assert(0, "no valid evex tuple_table entry");
 463       break;
 464     }
 465 
 466     int vector_len = _attributes->get_vector_len();
 467     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 468       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 469       if ((disp % disp_factor) == 0) {
 470         int new_disp = disp / disp_factor;
 471         if (is8bit(new_disp)) {
 472           disp = new_disp;
 473         }
 474       } else {
 475         return false;
 476       }
 477     }
 478   }
 479   return is8bit(disp);
 480 }
 481 
 482 
 483 void Assembler::emit_operand(Register reg, Register base, Register index,
 484                              Address::ScaleFactor scale, int disp,
 485                              RelocationHolder const& rspec,
 486                              int rip_relative_correction) {
 487   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 488 
 489   // Encode the registers as needed in the fields they are used in
 490 
 491   int regenc = encode(reg) << 3;
 492   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 493   int baseenc = base->is_valid() ? encode(base) : 0;
 494 
 495   if (base->is_valid()) {
 496     if (index->is_valid()) {
 497       assert(scale != Address::no_scale, "inconsistent address");
 498       // [base + index*scale + disp]
 499       if (disp == 0 && rtype == relocInfo::none  &&
 500           base != rbp LP64_ONLY(&& base != r13)) {
 501         // [base + index*scale]
 502         // [00 reg 100][ss index base]
 503         assert(index != rsp, "illegal addressing mode");
 504         emit_int8(0x04 | regenc);
 505         emit_int8(scale << 6 | indexenc | baseenc);
 506       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 507         // [base + index*scale + imm8]
 508         // [01 reg 100][ss index base] imm8
 509         assert(index != rsp, "illegal addressing mode");
 510         emit_int8(0x44 | regenc);
 511         emit_int8(scale << 6 | indexenc | baseenc);
 512         emit_int8(disp & 0xFF);
 513       } else {
 514         // [base + index*scale + disp32]
 515         // [10 reg 100][ss index base] disp32
 516         assert(index != rsp, "illegal addressing mode");
 517         emit_int8(0x84 | regenc);
 518         emit_int8(scale << 6 | indexenc | baseenc);
 519         emit_data(disp, rspec, disp32_operand);
 520       }
 521     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 522       // [rsp + disp]
 523       if (disp == 0 && rtype == relocInfo::none) {
 524         // [rsp]
 525         // [00 reg 100][00 100 100]
 526         emit_int8(0x04 | regenc);
 527         emit_int8(0x24);
 528       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 529         // [rsp + imm8]
 530         // [01 reg 100][00 100 100] disp8
 531         emit_int8(0x44 | regenc);
 532         emit_int8(0x24);
 533         emit_int8(disp & 0xFF);
 534       } else {
 535         // [rsp + imm32]
 536         // [10 reg 100][00 100 100] disp32
 537         emit_int8(0x84 | regenc);
 538         emit_int8(0x24);
 539         emit_data(disp, rspec, disp32_operand);
 540       }
 541     } else {
 542       // [base + disp]
 543       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 544       if (disp == 0 && rtype == relocInfo::none &&
 545           base != rbp LP64_ONLY(&& base != r13)) {
 546         // [base]
 547         // [00 reg base]
 548         emit_int8(0x00 | regenc | baseenc);
 549       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 550         // [base + disp8]
 551         // [01 reg base] disp8
 552         emit_int8(0x40 | regenc | baseenc);
 553         emit_int8(disp & 0xFF);
 554       } else {
 555         // [base + disp32]
 556         // [10 reg base] disp32
 557         emit_int8(0x80 | regenc | baseenc);
 558         emit_data(disp, rspec, disp32_operand);
 559       }
 560     }
 561   } else {
 562     if (index->is_valid()) {
 563       assert(scale != Address::no_scale, "inconsistent address");
 564       // [index*scale + disp]
 565       // [00 reg 100][ss index 101] disp32
 566       assert(index != rsp, "illegal addressing mode");
 567       emit_int8(0x04 | regenc);
 568       emit_int8(scale << 6 | indexenc | 0x05);
 569       emit_data(disp, rspec, disp32_operand);
 570     } else if (rtype != relocInfo::none ) {
 571       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 572       // [00 000 101] disp32
 573 
 574       emit_int8(0x05 | regenc);
 575       // Note that the RIP-rel. correction applies to the generated
 576       // disp field, but _not_ to the target address in the rspec.
 577 
 578       // disp was created by converting the target address minus the pc
 579       // at the start of the instruction. That needs more correction here.
 580       // intptr_t disp = target - next_ip;
 581       assert(inst_mark() != NULL, "must be inside InstructionMark");
 582       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 583       int64_t adjusted = disp;
 584       // Do rip-rel adjustment for 64bit
 585       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 586       assert(is_simm32(adjusted),
 587              "must be 32bit offset (RIP relative address)");
 588       emit_data((int32_t) adjusted, rspec, disp32_operand);
 589 
 590     } else {
 591       // 32bit never did this, did everything as the rip-rel/disp code above
 592       // [disp] ABSOLUTE
 593       // [00 reg 100][00 100 101] disp32
 594       emit_int8(0x04 | regenc);
 595       emit_int8(0x25);
 596       emit_data(disp, rspec, disp32_operand);
 597     }
 598   }
 599 }
 600 
 601 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 602                              Address::ScaleFactor scale, int disp,
 603                              RelocationHolder const& rspec) {
 604   if (UseAVX > 2) {
 605     int xreg_enc = reg->encoding();
 606     if (xreg_enc > 15) {
 607       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 608       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 609       return;
 610     }
 611   }
 612   emit_operand((Register)reg, base, index, scale, disp, rspec);
 613 }
 614 
 615 void Assembler::emit_operand(XMMRegister reg, Register base, XMMRegister index,
 616                              Address::ScaleFactor scale, int disp,
 617                              RelocationHolder const& rspec) {
 618   if (UseAVX > 2) {
 619     int xreg_enc = reg->encoding();
 620     int xmmindex_enc = index->encoding();
 621     XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 622     XMMRegister new_index = as_XMMRegister(xmmindex_enc & 0xf);
 623     emit_operand((Register)new_reg, base, (Register)new_index, scale, disp, rspec);
 624   } else {
 625     emit_operand((Register)reg, base, (Register)index, scale, disp, rspec);
 626   }
 627 }
 628 
 629 
 630 // Secret local extension to Assembler::WhichOperand:
 631 #define end_pc_operand (_WhichOperand_limit)
 632 
 633 address Assembler::locate_operand(address inst, WhichOperand which) {
 634   // Decode the given instruction, and return the address of
 635   // an embedded 32-bit operand word.
 636 
 637   // If "which" is disp32_operand, selects the displacement portion
 638   // of an effective address specifier.
 639   // If "which" is imm64_operand, selects the trailing immediate constant.
 640   // If "which" is call32_operand, selects the displacement of a call or jump.
 641   // Caller is responsible for ensuring that there is such an operand,
 642   // and that it is 32/64 bits wide.
 643 
 644   // If "which" is end_pc_operand, find the end of the instruction.
 645 
 646   address ip = inst;
 647   bool is_64bit = false;
 648 
 649   debug_only(bool has_disp32 = false);
 650   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 651 
 652   again_after_prefix:
 653   switch (0xFF & *ip++) {
 654 
 655   // These convenience macros generate groups of "case" labels for the switch.
 656 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 657 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 658              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 659 #define REP16(x) REP8((x)+0): \
 660               case REP8((x)+8)
 661 
 662   case CS_segment:
 663   case SS_segment:
 664   case DS_segment:
 665   case ES_segment:
 666   case FS_segment:
 667   case GS_segment:
 668     // Seems dubious
 669     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 670     assert(ip == inst+1, "only one prefix allowed");
 671     goto again_after_prefix;
 672 
 673   case 0x67:
 674   case REX:
 675   case REX_B:
 676   case REX_X:
 677   case REX_XB:
 678   case REX_R:
 679   case REX_RB:
 680   case REX_RX:
 681   case REX_RXB:
 682     NOT_LP64(assert(false, "64bit prefixes"));
 683     goto again_after_prefix;
 684 
 685   case REX_W:
 686   case REX_WB:
 687   case REX_WX:
 688   case REX_WXB:
 689   case REX_WR:
 690   case REX_WRB:
 691   case REX_WRX:
 692   case REX_WRXB:
 693     NOT_LP64(assert(false, "64bit prefixes"));
 694     is_64bit = true;
 695     goto again_after_prefix;
 696 
 697   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 698   case 0x88: // movb a, r
 699   case 0x89: // movl a, r
 700   case 0x8A: // movb r, a
 701   case 0x8B: // movl r, a
 702   case 0x8F: // popl a
 703     debug_only(has_disp32 = true);
 704     break;
 705 
 706   case 0x68: // pushq #32
 707     if (which == end_pc_operand) {
 708       return ip + 4;
 709     }
 710     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 711     return ip;                  // not produced by emit_operand
 712 
 713   case 0x66: // movw ... (size prefix)
 714     again_after_size_prefix2:
 715     switch (0xFF & *ip++) {
 716     case REX:
 717     case REX_B:
 718     case REX_X:
 719     case REX_XB:
 720     case REX_R:
 721     case REX_RB:
 722     case REX_RX:
 723     case REX_RXB:
 724     case REX_W:
 725     case REX_WB:
 726     case REX_WX:
 727     case REX_WXB:
 728     case REX_WR:
 729     case REX_WRB:
 730     case REX_WRX:
 731     case REX_WRXB:
 732       NOT_LP64(assert(false, "64bit prefix found"));
 733       goto again_after_size_prefix2;
 734     case 0x8B: // movw r, a
 735     case 0x89: // movw a, r
 736       debug_only(has_disp32 = true);
 737       break;
 738     case 0xC7: // movw a, #16
 739       debug_only(has_disp32 = true);
 740       tail_size = 2;  // the imm16
 741       break;
 742     case 0x0F: // several SSE/SSE2 variants
 743       ip--;    // reparse the 0x0F
 744       goto again_after_prefix;
 745     default:
 746       ShouldNotReachHere();
 747     }
 748     break;
 749 
 750   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 751     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 752     // these asserts are somewhat nonsensical
 753 #ifndef _LP64
 754     assert(which == imm_operand || which == disp32_operand,
 755            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 756 #else
 757     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 758            which == narrow_oop_operand && !is_64bit,
 759            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 760 #endif // _LP64
 761     return ip;
 762 
 763   case 0x69: // imul r, a, #32
 764   case 0xC7: // movl a, #32(oop?)
 765     tail_size = 4;
 766     debug_only(has_disp32 = true); // has both kinds of operands!
 767     break;
 768 
 769   case 0x0F: // movx..., etc.
 770     switch (0xFF & *ip++) {
 771     case 0x3A: // pcmpestri
 772       tail_size = 1;
 773     case 0x38: // ptest, pmovzxbw
 774       ip++; // skip opcode
 775       debug_only(has_disp32 = true); // has both kinds of operands!
 776       break;
 777 
 778     case 0x70: // pshufd r, r/a, #8
 779       debug_only(has_disp32 = true); // has both kinds of operands!
 780     case 0x73: // psrldq r, #8
 781       tail_size = 1;
 782       break;
 783 
 784     case 0x12: // movlps
 785     case 0x28: // movaps
 786     case 0x2E: // ucomiss
 787     case 0x2F: // comiss
 788     case 0x54: // andps
 789     case 0x55: // andnps
 790     case 0x56: // orps
 791     case 0x57: // xorps
 792     case 0x58: // addpd
 793     case 0x59: // mulpd
 794     case 0x6E: // movd
 795     case 0x7E: // movd
 796     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 797     case 0xFE: // paddd
 798       debug_only(has_disp32 = true);
 799       break;
 800 
 801     case 0xAD: // shrd r, a, %cl
 802     case 0xAF: // imul r, a
 803     case 0xBE: // movsbl r, a (movsxb)
 804     case 0xBF: // movswl r, a (movsxw)
 805     case 0xB6: // movzbl r, a (movzxb)
 806     case 0xB7: // movzwl r, a (movzxw)
 807     case REP16(0x40): // cmovl cc, r, a
 808     case 0xB0: // cmpxchgb
 809     case 0xB1: // cmpxchg
 810     case 0xC1: // xaddl
 811     case 0xC7: // cmpxchg8
 812     case REP16(0x90): // setcc a
 813       debug_only(has_disp32 = true);
 814       // fall out of the switch to decode the address
 815       break;
 816 
 817     case 0xC4: // pinsrw r, a, #8
 818       debug_only(has_disp32 = true);
 819     case 0xC5: // pextrw r, r, #8
 820       tail_size = 1;  // the imm8
 821       break;
 822 
 823     case 0xAC: // shrd r, a, #8
 824       debug_only(has_disp32 = true);
 825       tail_size = 1;  // the imm8
 826       break;
 827 
 828     case REP16(0x80): // jcc rdisp32
 829       if (which == end_pc_operand)  return ip + 4;
 830       assert(which == call32_operand, "jcc has no disp32 or imm");
 831       return ip;
 832     default:
 833       ShouldNotReachHere();
 834     }
 835     break;
 836 
 837   case 0x81: // addl a, #32; addl r, #32
 838     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 839     // on 32bit in the case of cmpl, the imm might be an oop
 840     tail_size = 4;
 841     debug_only(has_disp32 = true); // has both kinds of operands!
 842     break;
 843 
 844   case 0x83: // addl a, #8; addl r, #8
 845     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 846     debug_only(has_disp32 = true); // has both kinds of operands!
 847     tail_size = 1;
 848     break;
 849 
 850   case 0x9B:
 851     switch (0xFF & *ip++) {
 852     case 0xD9: // fnstcw a
 853       debug_only(has_disp32 = true);
 854       break;
 855     default:
 856       ShouldNotReachHere();
 857     }
 858     break;
 859 
 860   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 861   case REP4(0x10): // adc...
 862   case REP4(0x20): // and...
 863   case REP4(0x30): // xor...
 864   case REP4(0x08): // or...
 865   case REP4(0x18): // sbb...
 866   case REP4(0x28): // sub...
 867   case 0xF7: // mull a
 868   case 0x8D: // lea r, a
 869   case 0x87: // xchg r, a
 870   case REP4(0x38): // cmp...
 871   case 0x85: // test r, a
 872     debug_only(has_disp32 = true); // has both kinds of operands!
 873     break;
 874 
 875   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 876   case 0xC6: // movb a, #8
 877   case 0x80: // cmpb a, #8
 878   case 0x6B: // imul r, a, #8
 879     debug_only(has_disp32 = true); // has both kinds of operands!
 880     tail_size = 1; // the imm8
 881     break;
 882 
 883   case 0xC4: // VEX_3bytes
 884   case 0xC5: // VEX_2bytes
 885     assert((UseAVX > 0), "shouldn't have VEX prefix");
 886     assert(ip == inst+1, "no prefixes allowed");
 887     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 888     // but they have prefix 0x0F and processed when 0x0F processed above.
 889     //
 890     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 891     // instructions (these instructions are not supported in 64-bit mode).
 892     // To distinguish them bits [7:6] are set in the VEX second byte since
 893     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 894     // those VEX bits REX and vvvv bits are inverted.
 895     //
 896     // Fortunately C2 doesn't generate these instructions so we don't need
 897     // to check for them in product version.
 898 
 899     // Check second byte
 900     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 901 
 902     int vex_opcode;
 903     // First byte
 904     if ((0xFF & *inst) == VEX_3bytes) {
 905       vex_opcode = VEX_OPCODE_MASK & *ip;
 906       ip++; // third byte
 907       is_64bit = ((VEX_W & *ip) == VEX_W);
 908     } else {
 909       vex_opcode = VEX_OPCODE_0F;
 910     }
 911     ip++; // opcode
 912     // To find the end of instruction (which == end_pc_operand).
 913     switch (vex_opcode) {
 914       case VEX_OPCODE_0F:
 915         switch (0xFF & *ip) {
 916         case 0x70: // pshufd r, r/a, #8
 917         case 0x71: // ps[rl|ra|ll]w r, #8
 918         case 0x72: // ps[rl|ra|ll]d r, #8
 919         case 0x73: // ps[rl|ra|ll]q r, #8
 920         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 921         case 0xC4: // pinsrw r, r, r/a, #8
 922         case 0xC5: // pextrw r/a, r, #8
 923         case 0xC6: // shufp[s|d] r, r, r/a, #8
 924           tail_size = 1;  // the imm8
 925           break;
 926         }
 927         break;
 928       case VEX_OPCODE_0F_3A:
 929         tail_size = 1;
 930         break;
 931     }
 932     ip++; // skip opcode
 933     debug_only(has_disp32 = true); // has both kinds of operands!
 934     break;
 935 
 936   case 0x62: // EVEX_4bytes
 937     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 938     assert(ip == inst+1, "no prefixes allowed");
 939     // no EVEX collisions, all instructions that have 0x62 opcodes
 940     // have EVEX versions and are subopcodes of 0x66
 941     ip++; // skip P0 and exmaine W in P1
 942     is_64bit = ((VEX_W & *ip) == VEX_W);
 943     ip++; // move to P2
 944     ip++; // skip P2, move to opcode
 945     // To find the end of instruction (which == end_pc_operand).
 946     switch (0xFF & *ip) {
 947     case 0x22: // pinsrd r, r/a, #8
 948     case 0x61: // pcmpestri r, r/a, #8
 949     case 0x70: // pshufd r, r/a, #8
 950     case 0x73: // psrldq r, #8
 951       tail_size = 1;  // the imm8
 952       break;
 953     default:
 954       break;
 955     }
 956     ip++; // skip opcode
 957     debug_only(has_disp32 = true); // has both kinds of operands!
 958     break;
 959 
 960   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 961   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 962   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 963   case 0xDD: // fld_d a; fst_d a; fstp_d a
 964   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 965   case 0xDF: // fild_d a; fistp_d a
 966   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 967   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 968   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 969     debug_only(has_disp32 = true);
 970     break;
 971 
 972   case 0xE8: // call rdisp32
 973   case 0xE9: // jmp  rdisp32
 974     if (which == end_pc_operand)  return ip + 4;
 975     assert(which == call32_operand, "call has no disp32 or imm");
 976     return ip;
 977 
 978   case 0xF0:                    // Lock
 979     assert(os::is_MP(), "only on MP");
 980     goto again_after_prefix;
 981 
 982   case 0xF3:                    // For SSE
 983   case 0xF2:                    // For SSE2
 984     switch (0xFF & *ip++) {
 985     case REX:
 986     case REX_B:
 987     case REX_X:
 988     case REX_XB:
 989     case REX_R:
 990     case REX_RB:
 991     case REX_RX:
 992     case REX_RXB:
 993     case REX_W:
 994     case REX_WB:
 995     case REX_WX:
 996     case REX_WXB:
 997     case REX_WR:
 998     case REX_WRB:
 999     case REX_WRX:
1000     case REX_WRXB:
1001       NOT_LP64(assert(false, "found 64bit prefix"));
1002       ip++;
1003     default:
1004       ip++;
1005     }
1006     debug_only(has_disp32 = true); // has both kinds of operands!
1007     break;
1008 
1009   default:
1010     ShouldNotReachHere();
1011 
1012 #undef REP8
1013 #undef REP16
1014   }
1015 
1016   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
1017 #ifdef _LP64
1018   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1019 #else
1020   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1021   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1022 #endif // LP64
1023   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1024 
1025   // parse the output of emit_operand
1026   int op2 = 0xFF & *ip++;
1027   int base = op2 & 0x07;
1028   int op3 = -1;
1029   const int b100 = 4;
1030   const int b101 = 5;
1031   if (base == b100 && (op2 >> 6) != 3) {
1032     op3 = 0xFF & *ip++;
1033     base = op3 & 0x07;   // refetch the base
1034   }
1035   // now ip points at the disp (if any)
1036 
1037   switch (op2 >> 6) {
1038   case 0:
1039     // [00 reg  100][ss index base]
1040     // [00 reg  100][00   100  esp]
1041     // [00 reg base]
1042     // [00 reg  100][ss index  101][disp32]
1043     // [00 reg  101]               [disp32]
1044 
1045     if (base == b101) {
1046       if (which == disp32_operand)
1047         return ip;              // caller wants the disp32
1048       ip += 4;                  // skip the disp32
1049     }
1050     break;
1051 
1052   case 1:
1053     // [01 reg  100][ss index base][disp8]
1054     // [01 reg  100][00   100  esp][disp8]
1055     // [01 reg base]               [disp8]
1056     ip += 1;                    // skip the disp8
1057     break;
1058 
1059   case 2:
1060     // [10 reg  100][ss index base][disp32]
1061     // [10 reg  100][00   100  esp][disp32]
1062     // [10 reg base]               [disp32]
1063     if (which == disp32_operand)
1064       return ip;                // caller wants the disp32
1065     ip += 4;                    // skip the disp32
1066     break;
1067 
1068   case 3:
1069     // [11 reg base]  (not a memory addressing mode)
1070     break;
1071   }
1072 
1073   if (which == end_pc_operand) {
1074     return ip + tail_size;
1075   }
1076 
1077 #ifdef _LP64
1078   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1079 #else
1080   assert(which == imm_operand, "instruction has only an imm field");
1081 #endif // LP64
1082   return ip;
1083 }
1084 
1085 address Assembler::locate_next_instruction(address inst) {
1086   // Secretly share code with locate_operand:
1087   return locate_operand(inst, end_pc_operand);
1088 }
1089 
1090 
1091 #ifdef ASSERT
1092 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1093   address inst = inst_mark();
1094   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1095   address opnd;
1096 
1097   Relocation* r = rspec.reloc();
1098   if (r->type() == relocInfo::none) {
1099     return;
1100   } else if (r->is_call() || format == call32_operand) {
1101     // assert(format == imm32_operand, "cannot specify a nonzero format");
1102     opnd = locate_operand(inst, call32_operand);
1103   } else if (r->is_data()) {
1104     assert(format == imm_operand || format == disp32_operand
1105            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1106     opnd = locate_operand(inst, (WhichOperand)format);
1107   } else {
1108     assert(format == imm_operand, "cannot specify a format");
1109     return;
1110   }
1111   assert(opnd == pc(), "must put operand where relocs can find it");
1112 }
1113 #endif // ASSERT
1114 
1115 void Assembler::emit_operand32(Register reg, Address adr) {
1116   assert(reg->encoding() < 8, "no extended registers");
1117   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1118   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1119                adr._rspec);
1120 }
1121 
1122 void Assembler::emit_operand(Register reg, Address adr,
1123                              int rip_relative_correction) {
1124   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1125                adr._rspec,
1126                rip_relative_correction);
1127 }
1128 
1129 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1130     if (adr.isxmmindex()) {
1131        emit_operand(reg, adr._base, adr._xmmindex, adr._scale, adr._disp, adr._rspec);
1132     } else {
1133        emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1134        adr._rspec);
1135     }
1136 }
1137 
1138 // MMX operations
1139 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1140   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1141   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1142 }
1143 
1144 // work around gcc (3.2.1-7a) bug
1145 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1146   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1147   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1148 }
1149 
1150 
1151 void Assembler::emit_farith(int b1, int b2, int i) {
1152   assert(isByte(b1) && isByte(b2), "wrong opcode");
1153   assert(0 <= i &&  i < 8, "illegal stack offset");
1154   emit_int8(b1);
1155   emit_int8(b2 + i);
1156 }
1157 
1158 
1159 // Now the Assembler instructions (identical for 32/64 bits)
1160 
1161 void Assembler::adcl(Address dst, int32_t imm32) {
1162   InstructionMark im(this);
1163   prefix(dst);
1164   emit_arith_operand(0x81, rdx, dst, imm32);
1165 }
1166 
1167 void Assembler::adcl(Address dst, Register src) {
1168   InstructionMark im(this);
1169   prefix(dst, src);
1170   emit_int8(0x11);
1171   emit_operand(src, dst);
1172 }
1173 
1174 void Assembler::adcl(Register dst, int32_t imm32) {
1175   prefix(dst);
1176   emit_arith(0x81, 0xD0, dst, imm32);
1177 }
1178 
1179 void Assembler::adcl(Register dst, Address src) {
1180   InstructionMark im(this);
1181   prefix(src, dst);
1182   emit_int8(0x13);
1183   emit_operand(dst, src);
1184 }
1185 
1186 void Assembler::adcl(Register dst, Register src) {
1187   (void) prefix_and_encode(dst->encoding(), src->encoding());
1188   emit_arith(0x13, 0xC0, dst, src);
1189 }
1190 
1191 void Assembler::addl(Address dst, int32_t imm32) {
1192   InstructionMark im(this);
1193   prefix(dst);
1194   emit_arith_operand(0x81, rax, dst, imm32);
1195 }
1196 
1197 void Assembler::addb(Address dst, int imm8) {
1198   InstructionMark im(this);
1199   prefix(dst);
1200   emit_int8((unsigned char)0x80);
1201   emit_operand(rax, dst, 1);
1202   emit_int8(imm8);
1203 }
1204 
1205 void Assembler::addw(Address dst, int imm16) {
1206   InstructionMark im(this);
1207   emit_int8(0x66);
1208   prefix(dst);
1209   emit_int8((unsigned char)0x81);
1210   emit_operand(rax, dst, 2);
1211   emit_int16(imm16);
1212 }
1213 
1214 void Assembler::addl(Address dst, Register src) {
1215   InstructionMark im(this);
1216   prefix(dst, src);
1217   emit_int8(0x01);
1218   emit_operand(src, dst);
1219 }
1220 
1221 void Assembler::addl(Register dst, int32_t imm32) {
1222   prefix(dst);
1223   emit_arith(0x81, 0xC0, dst, imm32);
1224 }
1225 
1226 void Assembler::addl(Register dst, Address src) {
1227   InstructionMark im(this);
1228   prefix(src, dst);
1229   emit_int8(0x03);
1230   emit_operand(dst, src);
1231 }
1232 
1233 void Assembler::addl(Register dst, Register src) {
1234   (void) prefix_and_encode(dst->encoding(), src->encoding());
1235   emit_arith(0x03, 0xC0, dst, src);
1236 }
1237 
1238 void Assembler::addr_nop_4() {
1239   assert(UseAddressNop, "no CPU support");
1240   // 4 bytes: NOP DWORD PTR [EAX+0]
1241   emit_int8(0x0F);
1242   emit_int8(0x1F);
1243   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1244   emit_int8(0);    // 8-bits offset (1 byte)
1245 }
1246 
1247 void Assembler::addr_nop_5() {
1248   assert(UseAddressNop, "no CPU support");
1249   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1250   emit_int8(0x0F);
1251   emit_int8(0x1F);
1252   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1253   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1254   emit_int8(0);    // 8-bits offset (1 byte)
1255 }
1256 
1257 void Assembler::addr_nop_7() {
1258   assert(UseAddressNop, "no CPU support");
1259   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1260   emit_int8(0x0F);
1261   emit_int8(0x1F);
1262   emit_int8((unsigned char)0x80);
1263                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1264   emit_int32(0);   // 32-bits offset (4 bytes)
1265 }
1266 
1267 void Assembler::addr_nop_8() {
1268   assert(UseAddressNop, "no CPU support");
1269   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1270   emit_int8(0x0F);
1271   emit_int8(0x1F);
1272   emit_int8((unsigned char)0x84);
1273                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1274   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1275   emit_int32(0);   // 32-bits offset (4 bytes)
1276 }
1277 
1278 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1279   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1280   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1281   attributes.set_rex_vex_w_reverted();
1282   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1283   emit_int8(0x58);
1284   emit_int8((unsigned char)(0xC0 | encode));
1285 }
1286 
1287 void Assembler::addsd(XMMRegister dst, Address src) {
1288   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1289   InstructionMark im(this);
1290   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1291   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1292   attributes.set_rex_vex_w_reverted();
1293   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1294   emit_int8(0x58);
1295   emit_operand(dst, src);
1296 }
1297 
1298 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1299   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1300   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1301   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1302   emit_int8(0x58);
1303   emit_int8((unsigned char)(0xC0 | encode));
1304 }
1305 
1306 void Assembler::addss(XMMRegister dst, Address src) {
1307   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1308   InstructionMark im(this);
1309   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1310   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1311   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1312   emit_int8(0x58);
1313   emit_operand(dst, src);
1314 }
1315 
1316 void Assembler::aesdec(XMMRegister dst, Address src) {
1317   assert(VM_Version::supports_aes(), "");
1318   InstructionMark im(this);
1319   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1320   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1321   emit_int8((unsigned char)0xDE);
1322   emit_operand(dst, src);
1323 }
1324 
1325 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1326   assert(VM_Version::supports_aes(), "");
1327   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1328   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1329   emit_int8((unsigned char)0xDE);
1330   emit_int8(0xC0 | encode);
1331 }
1332 
1333 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1334   assert(VM_Version::supports_aes(), "");
1335   InstructionMark im(this);
1336   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1337   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1338   emit_int8((unsigned char)0xDF);
1339   emit_operand(dst, src);
1340 }
1341 
1342 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1343   assert(VM_Version::supports_aes(), "");
1344   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1345   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1346   emit_int8((unsigned char)0xDF);
1347   emit_int8((unsigned char)(0xC0 | encode));
1348 }
1349 
1350 void Assembler::aesenc(XMMRegister dst, Address src) {
1351   assert(VM_Version::supports_aes(), "");
1352   InstructionMark im(this);
1353   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1354   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1355   emit_int8((unsigned char)0xDC);
1356   emit_operand(dst, src);
1357 }
1358 
1359 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1360   assert(VM_Version::supports_aes(), "");
1361   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1362   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1363   emit_int8((unsigned char)0xDC);
1364   emit_int8(0xC0 | encode);
1365 }
1366 
1367 void Assembler::aesenclast(XMMRegister dst, Address src) {
1368   assert(VM_Version::supports_aes(), "");
1369   InstructionMark im(this);
1370   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1371   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1372   emit_int8((unsigned char)0xDD);
1373   emit_operand(dst, src);
1374 }
1375 
1376 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1377   assert(VM_Version::supports_aes(), "");
1378   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1379   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1380   emit_int8((unsigned char)0xDD);
1381   emit_int8((unsigned char)(0xC0 | encode));
1382 }
1383 
1384 void Assembler::andl(Address dst, int32_t imm32) {
1385   InstructionMark im(this);
1386   prefix(dst);
1387   emit_int8((unsigned char)0x81);
1388   emit_operand(rsp, dst, 4);
1389   emit_int32(imm32);
1390 }
1391 
1392 void Assembler::andl(Register dst, int32_t imm32) {
1393   prefix(dst);
1394   emit_arith(0x81, 0xE0, dst, imm32);
1395 }
1396 
1397 void Assembler::andl(Register dst, Address src) {
1398   InstructionMark im(this);
1399   prefix(src, dst);
1400   emit_int8(0x23);
1401   emit_operand(dst, src);
1402 }
1403 
1404 void Assembler::andl(Register dst, Register src) {
1405   (void) prefix_and_encode(dst->encoding(), src->encoding());
1406   emit_arith(0x23, 0xC0, dst, src);
1407 }
1408 
1409 void Assembler::andnl(Register dst, Register src1, Register src2) {
1410   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1411   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1412   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1413   emit_int8((unsigned char)0xF2);
1414   emit_int8((unsigned char)(0xC0 | encode));
1415 }
1416 
1417 void Assembler::andnl(Register dst, Register src1, Address src2) {
1418   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1419   InstructionMark im(this);
1420   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1421   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1422   emit_int8((unsigned char)0xF2);
1423   emit_operand(dst, src2);
1424 }
1425 
1426 void Assembler::bsfl(Register dst, Register src) {
1427   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1428   emit_int8(0x0F);
1429   emit_int8((unsigned char)0xBC);
1430   emit_int8((unsigned char)(0xC0 | encode));
1431 }
1432 
1433 void Assembler::bsrl(Register dst, Register src) {
1434   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1435   emit_int8(0x0F);
1436   emit_int8((unsigned char)0xBD);
1437   emit_int8((unsigned char)(0xC0 | encode));
1438 }
1439 
1440 void Assembler::bswapl(Register reg) { // bswap
1441   int encode = prefix_and_encode(reg->encoding());
1442   emit_int8(0x0F);
1443   emit_int8((unsigned char)(0xC8 | encode));
1444 }
1445 
1446 void Assembler::blsil(Register dst, Register src) {
1447   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1448   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1449   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1450   emit_int8((unsigned char)0xF3);
1451   emit_int8((unsigned char)(0xC0 | encode));
1452 }
1453 
1454 void Assembler::blsil(Register dst, Address src) {
1455   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1456   InstructionMark im(this);
1457   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1458   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1459   emit_int8((unsigned char)0xF3);
1460   emit_operand(rbx, src);
1461 }
1462 
1463 void Assembler::blsmskl(Register dst, Register src) {
1464   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1465   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1466   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1467   emit_int8((unsigned char)0xF3);
1468   emit_int8((unsigned char)(0xC0 | encode));
1469 }
1470 
1471 void Assembler::blsmskl(Register dst, Address src) {
1472   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1473   InstructionMark im(this);
1474   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1475   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1476   emit_int8((unsigned char)0xF3);
1477   emit_operand(rdx, src);
1478 }
1479 
1480 void Assembler::blsrl(Register dst, Register src) {
1481   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1482   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1483   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1484   emit_int8((unsigned char)0xF3);
1485   emit_int8((unsigned char)(0xC0 | encode));
1486 }
1487 
1488 void Assembler::blsrl(Register dst, Address src) {
1489   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1490   InstructionMark im(this);
1491   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1492   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1493   emit_int8((unsigned char)0xF3);
1494   emit_operand(rcx, src);
1495 }
1496 
1497 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1498   // suspect disp32 is always good
1499   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1500 
1501   if (L.is_bound()) {
1502     const int long_size = 5;
1503     int offs = (int)( target(L) - pc() );
1504     assert(offs <= 0, "assembler error");
1505     InstructionMark im(this);
1506     // 1110 1000 #32-bit disp
1507     emit_int8((unsigned char)0xE8);
1508     emit_data(offs - long_size, rtype, operand);
1509   } else {
1510     InstructionMark im(this);
1511     // 1110 1000 #32-bit disp
1512     L.add_patch_at(code(), locator());
1513 
1514     emit_int8((unsigned char)0xE8);
1515     emit_data(int(0), rtype, operand);
1516   }
1517 }
1518 
1519 void Assembler::call(Register dst) {
1520   int encode = prefix_and_encode(dst->encoding());
1521   emit_int8((unsigned char)0xFF);
1522   emit_int8((unsigned char)(0xD0 | encode));
1523 }
1524 
1525 
1526 void Assembler::call(Address adr) {
1527   InstructionMark im(this);
1528   prefix(adr);
1529   emit_int8((unsigned char)0xFF);
1530   emit_operand(rdx, adr);
1531 }
1532 
1533 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1534   InstructionMark im(this);
1535   emit_int8((unsigned char)0xE8);
1536   intptr_t disp = entry - (pc() + sizeof(int32_t));
1537   // Entry is NULL in case of a scratch emit.
1538   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1539   // Technically, should use call32_operand, but this format is
1540   // implied by the fact that we're emitting a call instruction.
1541 
1542   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1543   emit_data((int) disp, rspec, operand);
1544 }
1545 
1546 void Assembler::cdql() {
1547   emit_int8((unsigned char)0x99);
1548 }
1549 
1550 void Assembler::cld() {
1551   emit_int8((unsigned char)0xFC);
1552 }
1553 
1554 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1555   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1556   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1557   emit_int8(0x0F);
1558   emit_int8(0x40 | cc);
1559   emit_int8((unsigned char)(0xC0 | encode));
1560 }
1561 
1562 
1563 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1564   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1565   prefix(src, dst);
1566   emit_int8(0x0F);
1567   emit_int8(0x40 | cc);
1568   emit_operand(dst, src);
1569 }
1570 
1571 void Assembler::cmpb(Address dst, int imm8) {
1572   InstructionMark im(this);
1573   prefix(dst);
1574   emit_int8((unsigned char)0x80);
1575   emit_operand(rdi, dst, 1);
1576   emit_int8(imm8);
1577 }
1578 
1579 void Assembler::cmpl(Address dst, int32_t imm32) {
1580   InstructionMark im(this);
1581   prefix(dst);
1582   emit_int8((unsigned char)0x81);
1583   emit_operand(rdi, dst, 4);
1584   emit_int32(imm32);
1585 }
1586 
1587 void Assembler::cmpl(Register dst, int32_t imm32) {
1588   prefix(dst);
1589   emit_arith(0x81, 0xF8, dst, imm32);
1590 }
1591 
1592 void Assembler::cmpl(Register dst, Register src) {
1593   (void) prefix_and_encode(dst->encoding(), src->encoding());
1594   emit_arith(0x3B, 0xC0, dst, src);
1595 }
1596 
1597 void Assembler::cmpl(Register dst, Address  src) {
1598   InstructionMark im(this);
1599   prefix(src, dst);
1600   emit_int8((unsigned char)0x3B);
1601   emit_operand(dst, src);
1602 }
1603 
1604 void Assembler::cmpw(Address dst, int imm16) {
1605   InstructionMark im(this);
1606   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1607   emit_int8(0x66);
1608   emit_int8((unsigned char)0x81);
1609   emit_operand(rdi, dst, 2);
1610   emit_int16(imm16);
1611 }
1612 
1613 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1614 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1615 // The ZF is set if the compared values were equal, and cleared otherwise.
1616 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1617   InstructionMark im(this);
1618   prefix(adr, reg);
1619   emit_int8(0x0F);
1620   emit_int8((unsigned char)0xB1);
1621   emit_operand(reg, adr);
1622 }
1623 
1624 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1625 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1626 // The ZF is set if the compared values were equal, and cleared otherwise.
1627 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1628   InstructionMark im(this);
1629   prefix(adr, reg, true);
1630   emit_int8(0x0F);
1631   emit_int8((unsigned char)0xB0);
1632   emit_operand(reg, adr);
1633 }
1634 
1635 void Assembler::comisd(XMMRegister dst, Address src) {
1636   // NOTE: dbx seems to decode this as comiss even though the
1637   // 0x66 is there. Strangly ucomisd comes out correct
1638   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1639   InstructionMark im(this);
1640   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1641   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1642   attributes.set_rex_vex_w_reverted();
1643   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1644   emit_int8(0x2F);
1645   emit_operand(dst, src);
1646 }
1647 
1648 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1649   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1650   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1651   attributes.set_rex_vex_w_reverted();
1652   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1653   emit_int8(0x2F);
1654   emit_int8((unsigned char)(0xC0 | encode));
1655 }
1656 
1657 void Assembler::comiss(XMMRegister dst, Address src) {
1658   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1659   InstructionMark im(this);
1660   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1661   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1662   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1663   emit_int8(0x2F);
1664   emit_operand(dst, src);
1665 }
1666 
1667 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1668   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1669   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1670   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1671   emit_int8(0x2F);
1672   emit_int8((unsigned char)(0xC0 | encode));
1673 }
1674 
1675 void Assembler::cpuid() {
1676   emit_int8(0x0F);
1677   emit_int8((unsigned char)0xA2);
1678 }
1679 
1680 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1681 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1682 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1683 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1684 //
1685 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1686 //
1687 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1688 //
1689 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1690 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1691   assert(VM_Version::supports_sse4_2(), "");
1692   int8_t w = 0x01;
1693   Prefix p = Prefix_EMPTY;
1694 
1695   emit_int8((int8_t)0xF2);
1696   switch (sizeInBytes) {
1697   case 1:
1698     w = 0;
1699     break;
1700   case 2:
1701   case 4:
1702     break;
1703   LP64_ONLY(case 8:)
1704     // This instruction is not valid in 32 bits
1705     // Note:
1706     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1707     //
1708     // Page B - 72   Vol. 2C says
1709     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1710     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1711     //                                                                            F0!!!
1712     // while 3 - 208 Vol. 2A
1713     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1714     //
1715     // the 0 on a last bit is reserved for a different flavor of this instruction :
1716     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1717     p = REX_W;
1718     break;
1719   default:
1720     assert(0, "Unsupported value for a sizeInBytes argument");
1721     break;
1722   }
1723   LP64_ONLY(prefix(crc, v, p);)
1724   emit_int8((int8_t)0x0F);
1725   emit_int8(0x38);
1726   emit_int8((int8_t)(0xF0 | w));
1727   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1728 }
1729 
1730 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1731   assert(VM_Version::supports_sse4_2(), "");
1732   InstructionMark im(this);
1733   int8_t w = 0x01;
1734   Prefix p = Prefix_EMPTY;
1735 
1736   emit_int8((int8_t)0xF2);
1737   switch (sizeInBytes) {
1738   case 1:
1739     w = 0;
1740     break;
1741   case 2:
1742   case 4:
1743     break;
1744   LP64_ONLY(case 8:)
1745     // This instruction is not valid in 32 bits
1746     p = REX_W;
1747     break;
1748   default:
1749     assert(0, "Unsupported value for a sizeInBytes argument");
1750     break;
1751   }
1752   LP64_ONLY(prefix(crc, adr, p);)
1753   emit_int8((int8_t)0x0F);
1754   emit_int8(0x38);
1755   emit_int8((int8_t)(0xF0 | w));
1756   emit_operand(crc, adr);
1757 }
1758 
1759 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1760   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1761   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1762   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1763   emit_int8((unsigned char)0xE6);
1764   emit_int8((unsigned char)(0xC0 | encode));
1765 }
1766 
1767 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1768   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1769   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1770   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1771   emit_int8(0x5B);
1772   emit_int8((unsigned char)(0xC0 | encode));
1773 }
1774 
1775 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1776   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1777   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1778   attributes.set_rex_vex_w_reverted();
1779   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1780   emit_int8(0x5A);
1781   emit_int8((unsigned char)(0xC0 | encode));
1782 }
1783 
1784 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1785   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1786   InstructionMark im(this);
1787   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1788   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1789   attributes.set_rex_vex_w_reverted();
1790   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1791   emit_int8(0x5A);
1792   emit_operand(dst, src);
1793 }
1794 
1795 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1796   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1797   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1798   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1799   emit_int8(0x2A);
1800   emit_int8((unsigned char)(0xC0 | encode));
1801 }
1802 
1803 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1804   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1805   InstructionMark im(this);
1806   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1807   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1808   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1809   emit_int8(0x2A);
1810   emit_operand(dst, src);
1811 }
1812 
1813 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1814   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1815   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1816   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1817   emit_int8(0x2A);
1818   emit_int8((unsigned char)(0xC0 | encode));
1819 }
1820 
1821 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1822   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1823   InstructionMark im(this);
1824   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1825   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1826   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1827   emit_int8(0x2A);
1828   emit_operand(dst, src);
1829 }
1830 
1831 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1832   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1833   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1834   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1835   emit_int8(0x2A);
1836   emit_int8((unsigned char)(0xC0 | encode));
1837 }
1838 
1839 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1841   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1842   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1843   emit_int8(0x5A);
1844   emit_int8((unsigned char)(0xC0 | encode));
1845 }
1846 
1847 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1848   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1849   InstructionMark im(this);
1850   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1851   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1852   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1853   emit_int8(0x5A);
1854   emit_operand(dst, src);
1855 }
1856 
1857 
1858 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1859   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1860   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1861   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1862   emit_int8(0x2C);
1863   emit_int8((unsigned char)(0xC0 | encode));
1864 }
1865 
1866 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1867   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1868   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1869   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1870   emit_int8(0x2C);
1871   emit_int8((unsigned char)(0xC0 | encode));
1872 }
1873 
1874 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1875   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1876   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1877   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1878   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1879   emit_int8((unsigned char)0xE6);
1880   emit_int8((unsigned char)(0xC0 | encode));
1881 }
1882 
1883 void Assembler::decl(Address dst) {
1884   // Don't use it directly. Use MacroAssembler::decrement() instead.
1885   InstructionMark im(this);
1886   prefix(dst);
1887   emit_int8((unsigned char)0xFF);
1888   emit_operand(rcx, dst);
1889 }
1890 
1891 void Assembler::divsd(XMMRegister dst, Address src) {
1892   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1893   InstructionMark im(this);
1894   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1895   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1896   attributes.set_rex_vex_w_reverted();
1897   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1898   emit_int8(0x5E);
1899   emit_operand(dst, src);
1900 }
1901 
1902 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1903   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1904   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1905   attributes.set_rex_vex_w_reverted();
1906   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1907   emit_int8(0x5E);
1908   emit_int8((unsigned char)(0xC0 | encode));
1909 }
1910 
1911 void Assembler::divss(XMMRegister dst, Address src) {
1912   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1913   InstructionMark im(this);
1914   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1915   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1916   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1917   emit_int8(0x5E);
1918   emit_operand(dst, src);
1919 }
1920 
1921 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1922   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1923   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1924   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1925   emit_int8(0x5E);
1926   emit_int8((unsigned char)(0xC0 | encode));
1927 }
1928 
1929 void Assembler::emms() {
1930   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1931   emit_int8(0x0F);
1932   emit_int8(0x77);
1933 }
1934 
1935 void Assembler::hlt() {
1936   emit_int8((unsigned char)0xF4);
1937 }
1938 
1939 void Assembler::idivl(Register src) {
1940   int encode = prefix_and_encode(src->encoding());
1941   emit_int8((unsigned char)0xF7);
1942   emit_int8((unsigned char)(0xF8 | encode));
1943 }
1944 
1945 void Assembler::divl(Register src) { // Unsigned
1946   int encode = prefix_and_encode(src->encoding());
1947   emit_int8((unsigned char)0xF7);
1948   emit_int8((unsigned char)(0xF0 | encode));
1949 }
1950 
1951 void Assembler::imull(Register src) {
1952   int encode = prefix_and_encode(src->encoding());
1953   emit_int8((unsigned char)0xF7);
1954   emit_int8((unsigned char)(0xE8 | encode));
1955 }
1956 
1957 void Assembler::imull(Register dst, Register src) {
1958   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1959   emit_int8(0x0F);
1960   emit_int8((unsigned char)0xAF);
1961   emit_int8((unsigned char)(0xC0 | encode));
1962 }
1963 
1964 
1965 void Assembler::imull(Register dst, Register src, int value) {
1966   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1967   if (is8bit(value)) {
1968     emit_int8(0x6B);
1969     emit_int8((unsigned char)(0xC0 | encode));
1970     emit_int8(value & 0xFF);
1971   } else {
1972     emit_int8(0x69);
1973     emit_int8((unsigned char)(0xC0 | encode));
1974     emit_int32(value);
1975   }
1976 }
1977 
1978 void Assembler::imull(Register dst, Address src) {
1979   InstructionMark im(this);
1980   prefix(src, dst);
1981   emit_int8(0x0F);
1982   emit_int8((unsigned char) 0xAF);
1983   emit_operand(dst, src);
1984 }
1985 
1986 
1987 void Assembler::incl(Address dst) {
1988   // Don't use it directly. Use MacroAssembler::increment() instead.
1989   InstructionMark im(this);
1990   prefix(dst);
1991   emit_int8((unsigned char)0xFF);
1992   emit_operand(rax, dst);
1993 }
1994 
1995 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1996   InstructionMark im(this);
1997   assert((0 <= cc) && (cc < 16), "illegal cc");
1998   if (L.is_bound()) {
1999     address dst = target(L);
2000     assert(dst != NULL, "jcc most probably wrong");
2001 
2002     const int short_size = 2;
2003     const int long_size = 6;
2004     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
2005     if (maybe_short && is8bit(offs - short_size)) {
2006       // 0111 tttn #8-bit disp
2007       emit_int8(0x70 | cc);
2008       emit_int8((offs - short_size) & 0xFF);
2009     } else {
2010       // 0000 1111 1000 tttn #32-bit disp
2011       assert(is_simm32(offs - long_size),
2012              "must be 32bit offset (call4)");
2013       emit_int8(0x0F);
2014       emit_int8((unsigned char)(0x80 | cc));
2015       emit_int32(offs - long_size);
2016     }
2017   } else {
2018     // Note: could eliminate cond. jumps to this jump if condition
2019     //       is the same however, seems to be rather unlikely case.
2020     // Note: use jccb() if label to be bound is very close to get
2021     //       an 8-bit displacement
2022     L.add_patch_at(code(), locator());
2023     emit_int8(0x0F);
2024     emit_int8((unsigned char)(0x80 | cc));
2025     emit_int32(0);
2026   }
2027 }
2028 
2029 void Assembler::jccb(Condition cc, Label& L) {
2030   if (L.is_bound()) {
2031     const int short_size = 2;
2032     address entry = target(L);
2033 #ifdef ASSERT
2034     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2035     intptr_t delta = short_branch_delta();
2036     if (delta != 0) {
2037       dist += (dist < 0 ? (-delta) :delta);
2038     }
2039     assert(is8bit(dist), "Dispacement too large for a short jmp");
2040 #endif
2041     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2042     // 0111 tttn #8-bit disp
2043     emit_int8(0x70 | cc);
2044     emit_int8((offs - short_size) & 0xFF);
2045   } else {
2046     InstructionMark im(this);
2047     L.add_patch_at(code(), locator());
2048     emit_int8(0x70 | cc);
2049     emit_int8(0);
2050   }
2051 }
2052 
2053 void Assembler::jmp(Address adr) {
2054   InstructionMark im(this);
2055   prefix(adr);
2056   emit_int8((unsigned char)0xFF);
2057   emit_operand(rsp, adr);
2058 }
2059 
2060 void Assembler::jmp(Label& L, bool maybe_short) {
2061   if (L.is_bound()) {
2062     address entry = target(L);
2063     assert(entry != NULL, "jmp most probably wrong");
2064     InstructionMark im(this);
2065     const int short_size = 2;
2066     const int long_size = 5;
2067     intptr_t offs = entry - pc();
2068     if (maybe_short && is8bit(offs - short_size)) {
2069       emit_int8((unsigned char)0xEB);
2070       emit_int8((offs - short_size) & 0xFF);
2071     } else {
2072       emit_int8((unsigned char)0xE9);
2073       emit_int32(offs - long_size);
2074     }
2075   } else {
2076     // By default, forward jumps are always 32-bit displacements, since
2077     // we can't yet know where the label will be bound.  If you're sure that
2078     // the forward jump will not run beyond 256 bytes, use jmpb to
2079     // force an 8-bit displacement.
2080     InstructionMark im(this);
2081     L.add_patch_at(code(), locator());
2082     emit_int8((unsigned char)0xE9);
2083     emit_int32(0);
2084   }
2085 }
2086 
2087 void Assembler::jmp(Register entry) {
2088   int encode = prefix_and_encode(entry->encoding());
2089   emit_int8((unsigned char)0xFF);
2090   emit_int8((unsigned char)(0xE0 | encode));
2091 }
2092 
2093 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2094   InstructionMark im(this);
2095   emit_int8((unsigned char)0xE9);
2096   assert(dest != NULL, "must have a target");
2097   intptr_t disp = dest - (pc() + sizeof(int32_t));
2098   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2099   emit_data(disp, rspec.reloc(), call32_operand);
2100 }
2101 
2102 void Assembler::jmpb(Label& L) {
2103   if (L.is_bound()) {
2104     const int short_size = 2;
2105     address entry = target(L);
2106     assert(entry != NULL, "jmp most probably wrong");
2107 #ifdef ASSERT
2108     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2109     intptr_t delta = short_branch_delta();
2110     if (delta != 0) {
2111       dist += (dist < 0 ? (-delta) :delta);
2112     }
2113     assert(is8bit(dist), "Dispacement too large for a short jmp");
2114 #endif
2115     intptr_t offs = entry - pc();
2116     emit_int8((unsigned char)0xEB);
2117     emit_int8((offs - short_size) & 0xFF);
2118   } else {
2119     InstructionMark im(this);
2120     L.add_patch_at(code(), locator());
2121     emit_int8((unsigned char)0xEB);
2122     emit_int8(0);
2123   }
2124 }
2125 
2126 void Assembler::ldmxcsr( Address src) {
2127   if (UseAVX > 0 ) {
2128     InstructionMark im(this);
2129     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2130     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2131     emit_int8((unsigned char)0xAE);
2132     emit_operand(as_Register(2), src);
2133   } else {
2134     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2135     InstructionMark im(this);
2136     prefix(src);
2137     emit_int8(0x0F);
2138     emit_int8((unsigned char)0xAE);
2139     emit_operand(as_Register(2), src);
2140   }
2141 }
2142 
2143 void Assembler::leal(Register dst, Address src) {
2144   InstructionMark im(this);
2145 #ifdef _LP64
2146   emit_int8(0x67); // addr32
2147   prefix(src, dst);
2148 #endif // LP64
2149   emit_int8((unsigned char)0x8D);
2150   emit_operand(dst, src);
2151 }
2152 
2153 void Assembler::lfence() {
2154   emit_int8(0x0F);
2155   emit_int8((unsigned char)0xAE);
2156   emit_int8((unsigned char)0xE8);
2157 }
2158 
2159 void Assembler::lock() {
2160   emit_int8((unsigned char)0xF0);
2161 }
2162 
2163 void Assembler::lzcntl(Register dst, Register src) {
2164   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2165   emit_int8((unsigned char)0xF3);
2166   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2167   emit_int8(0x0F);
2168   emit_int8((unsigned char)0xBD);
2169   emit_int8((unsigned char)(0xC0 | encode));
2170 }
2171 
2172 // Emit mfence instruction
2173 void Assembler::mfence() {
2174   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2175   emit_int8(0x0F);
2176   emit_int8((unsigned char)0xAE);
2177   emit_int8((unsigned char)0xF0);
2178 }
2179 
2180 void Assembler::mov(Register dst, Register src) {
2181   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2182 }
2183 
2184 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2185   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2186   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2187   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2188   attributes.set_rex_vex_w_reverted();
2189   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2190   emit_int8(0x28);
2191   emit_int8((unsigned char)(0xC0 | encode));
2192 }
2193 
2194 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2195   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2196   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2197   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2198   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2199   emit_int8(0x28);
2200   emit_int8((unsigned char)(0xC0 | encode));
2201 }
2202 
2203 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2204   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2205   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2206   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2207   emit_int8(0x16);
2208   emit_int8((unsigned char)(0xC0 | encode));
2209 }
2210 
2211 void Assembler::movb(Register dst, Address src) {
2212   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2213   InstructionMark im(this);
2214   prefix(src, dst, true);
2215   emit_int8((unsigned char)0x8A);
2216   emit_operand(dst, src);
2217 }
2218 
2219 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2220   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2221   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2222   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2223   attributes.set_rex_vex_w_reverted();
2224   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2225   emit_int8(0x12);
2226   emit_int8(0xC0 | encode);
2227 }
2228 
2229 void Assembler::kmovbl(KRegister dst, Register src) {
2230   assert(VM_Version::supports_avx512dq(), "");
2231   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2232   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2233   emit_int8((unsigned char)0x92);
2234   emit_int8((unsigned char)(0xC0 | encode));
2235 }
2236 
2237 void Assembler::kmovbl(Register dst, KRegister src) {
2238   assert(VM_Version::supports_avx512dq(), "");
2239   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2240   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2241   emit_int8((unsigned char)0x93);
2242   emit_int8((unsigned char)(0xC0 | encode));
2243 }
2244 
2245 void Assembler::kmovwl(KRegister dst, Register src) {
2246   assert(VM_Version::supports_evex(), "");
2247   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2248   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2249   emit_int8((unsigned char)0x92);
2250   emit_int8((unsigned char)(0xC0 | encode));
2251 }
2252 
2253 void Assembler::kmovwl(Register dst, KRegister src) {
2254   assert(VM_Version::supports_evex(), "");
2255   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2256   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2257   emit_int8((unsigned char)0x93);
2258   emit_int8((unsigned char)(0xC0 | encode));
2259 }
2260 
2261 void Assembler::kmovwl(KRegister dst, Address src) {
2262   assert(VM_Version::supports_evex(), "");
2263   InstructionMark im(this);
2264   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2265   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2266   emit_int8((unsigned char)0x90);
2267   emit_operand((Register)dst, src);
2268 }
2269 
2270 void Assembler::kmovdl(KRegister dst, Register src) {
2271   assert(VM_Version::supports_avx512bw(), "");
2272   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2273   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2274   emit_int8((unsigned char)0x92);
2275   emit_int8((unsigned char)(0xC0 | encode));
2276 }
2277 
2278 void Assembler::kmovdl(Register dst, KRegister src) {
2279   assert(VM_Version::supports_avx512bw(), "");
2280   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2281   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2282   emit_int8((unsigned char)0x93);
2283   emit_int8((unsigned char)(0xC0 | encode));
2284 }
2285 
2286 void Assembler::kmovql(KRegister dst, KRegister src) {
2287   assert(VM_Version::supports_avx512bw(), "");
2288   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2289   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2290   emit_int8((unsigned char)0x90);
2291   emit_int8((unsigned char)(0xC0 | encode));
2292 }
2293 
2294 void Assembler::kmovql(KRegister dst, Address src) {
2295   assert(VM_Version::supports_avx512bw(), "");
2296   InstructionMark im(this);
2297   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2298   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2299   emit_int8((unsigned char)0x90);
2300   emit_operand((Register)dst, src);
2301 }
2302 
2303 void Assembler::kmovql(Address dst, KRegister src) {
2304   assert(VM_Version::supports_avx512bw(), "");
2305   InstructionMark im(this);
2306   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2307   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2308   emit_int8((unsigned char)0x90);
2309   emit_operand((Register)src, dst);
2310 }
2311 
2312 void Assembler::kmovql(KRegister dst, Register src) {
2313   assert(VM_Version::supports_avx512bw(), "");
2314   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2315   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2316   emit_int8((unsigned char)0x92);
2317   emit_int8((unsigned char)(0xC0 | encode));
2318 }
2319 
2320 void Assembler::kmovql(Register dst, KRegister src) {
2321   assert(VM_Version::supports_avx512bw(), "");
2322   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2323   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2324   emit_int8((unsigned char)0x93);
2325   emit_int8((unsigned char)(0xC0 | encode));
2326 }
2327 
2328 void Assembler::knotwl(KRegister dst, KRegister src) {
2329   assert(VM_Version::supports_evex(), "");
2330   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2331   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2332   emit_int8((unsigned char)0x44);
2333   emit_int8((unsigned char)(0xC0 | encode));
2334 }
2335 
2336 // This instruction produces ZF or CF flags
2337 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2338   assert(VM_Version::supports_avx512dq(), "");
2339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2340   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2341   emit_int8((unsigned char)0x98);
2342   emit_int8((unsigned char)(0xC0 | encode));
2343 }
2344 
2345 // This instruction produces ZF or CF flags
2346 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2347   assert(VM_Version::supports_evex(), "");
2348   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2349   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2350   emit_int8((unsigned char)0x98);
2351   emit_int8((unsigned char)(0xC0 | encode));
2352 }
2353 
2354 // This instruction produces ZF or CF flags
2355 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2356   assert(VM_Version::supports_avx512bw(), "");
2357   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2358   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2359   emit_int8((unsigned char)0x98);
2360   emit_int8((unsigned char)(0xC0 | encode));
2361 }
2362 
2363 // This instruction produces ZF or CF flags
2364 void Assembler::kortestql(KRegister src1, KRegister src2) {
2365   assert(VM_Version::supports_avx512bw(), "");
2366   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2367   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2368   emit_int8((unsigned char)0x98);
2369   emit_int8((unsigned char)(0xC0 | encode));
2370 }
2371 
2372 // This instruction produces ZF or CF flags
2373 void Assembler::ktestql(KRegister src1, KRegister src2) {
2374   assert(VM_Version::supports_avx512bw(), "");
2375   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2376   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2377   emit_int8((unsigned char)0x99);
2378   emit_int8((unsigned char)(0xC0 | encode));
2379 }
2380 
2381 void Assembler::ktestq(KRegister src1, KRegister src2) {
2382   assert(VM_Version::supports_avx512bw(), "");
2383   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2384   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2385   emit_int8((unsigned char)0x99);
2386   emit_int8((unsigned char)(0xC0 | encode));
2387 }
2388 
2389 void Assembler::ktestd(KRegister src1, KRegister src2) {
2390   assert(VM_Version::supports_avx512bw(), "");
2391   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2392   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2393   emit_int8((unsigned char)0x99);
2394   emit_int8((unsigned char)(0xC0 | encode));
2395 }
2396 
2397 void Assembler::movb(Address dst, int imm8) {
2398   InstructionMark im(this);
2399    prefix(dst);
2400   emit_int8((unsigned char)0xC6);
2401   emit_operand(rax, dst, 1);
2402   emit_int8(imm8);
2403 }
2404 
2405 
2406 void Assembler::movb(Address dst, Register src) {
2407   assert(src->has_byte_register(), "must have byte register");
2408   InstructionMark im(this);
2409   prefix(dst, src, true);
2410   emit_int8((unsigned char)0x88);
2411   emit_operand(src, dst);
2412 }
2413 
2414 void Assembler::movdl(XMMRegister dst, Register src) {
2415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2417   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2418   emit_int8(0x6E);
2419   emit_int8((unsigned char)(0xC0 | encode));
2420 }
2421 
2422 void Assembler::movdl(Register dst, XMMRegister src) {
2423   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2424   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2425   // swap src/dst to get correct prefix
2426   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2427   emit_int8(0x7E);
2428   emit_int8((unsigned char)(0xC0 | encode));
2429 }
2430 
2431 void Assembler::movdl(XMMRegister dst, Address src) {
2432   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2433   InstructionMark im(this);
2434   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2435   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2436   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2437   emit_int8(0x6E);
2438   emit_operand(dst, src);
2439 }
2440 
2441 void Assembler::movdl(Address dst, XMMRegister src) {
2442   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2443   InstructionMark im(this);
2444   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2445   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2446   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2447   emit_int8(0x7E);
2448   emit_operand(src, dst);
2449 }
2450 
2451 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2452   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2453   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2454   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2455   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2456   emit_int8(0x6F);
2457   emit_int8((unsigned char)(0xC0 | encode));
2458 }
2459 
2460 void Assembler::movdqa(XMMRegister dst, Address src) {
2461   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2462   InstructionMark im(this);
2463   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2464   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2465   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2466   emit_int8(0x6F);
2467   emit_operand(dst, src);
2468 }
2469 
2470 void Assembler::movdqu(XMMRegister dst, Address src) {
2471   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2472   InstructionMark im(this);
2473   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2474   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2475   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2476   emit_int8(0x6F);
2477   emit_operand(dst, src);
2478 }
2479 
2480 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2481   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2482   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2483   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2484   emit_int8(0x6F);
2485   emit_int8((unsigned char)(0xC0 | encode));
2486 }
2487 
2488 void Assembler::movdqu(Address dst, XMMRegister src) {
2489   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2490   InstructionMark im(this);
2491   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2492   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2493   attributes.reset_is_clear_context();
2494   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2495   emit_int8(0x7F);
2496   emit_operand(src, dst);
2497 }
2498 
2499 // Move Unaligned 256bit Vector
2500 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2501   assert(UseAVX > 0, "");
2502   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2503   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2504   emit_int8(0x6F);
2505   emit_int8((unsigned char)(0xC0 | encode));
2506 }
2507 
2508 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2509   assert(UseAVX > 0, "");
2510   InstructionMark im(this);
2511   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2512   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2513   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2514   emit_int8(0x6F);
2515   emit_operand(dst, src);
2516 }
2517 
2518 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2519   assert(UseAVX > 0, "");
2520   InstructionMark im(this);
2521   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2522   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2523   attributes.reset_is_clear_context();
2524   // swap src<->dst for encoding
2525   assert(src != xnoreg, "sanity");
2526   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2527   emit_int8(0x7F);
2528   emit_operand(src, dst);
2529 }
2530 
2531 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2532 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2533   assert(VM_Version::supports_evex(), "");
2534   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2535   attributes.set_is_evex_instruction();
2536   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2537   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2538   emit_int8(0x6F);
2539   emit_int8((unsigned char)(0xC0 | encode));
2540 }
2541 
2542 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2543   assert(VM_Version::supports_evex(), "");
2544   InstructionMark im(this);
2545   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2546   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2547   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2548   attributes.set_is_evex_instruction();
2549   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2550   emit_int8(0x6F);
2551   emit_operand(dst, src);
2552 }
2553 
2554 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2555   assert(VM_Version::supports_evex(), "");
2556   assert(src != xnoreg, "sanity");
2557   InstructionMark im(this);
2558   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2559   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2560   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2561   attributes.set_is_evex_instruction();
2562   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2563   emit_int8(0x7F);
2564   emit_operand(src, dst);
2565 }
2566 
2567 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2568   assert(VM_Version::supports_avx512vlbw(), "");
2569   assert(is_vector_masking(), "");    // For stub code use only
2570   InstructionMark im(this);
2571   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2572   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2573   attributes.set_embedded_opmask_register_specifier(mask);
2574   attributes.set_is_evex_instruction();
2575   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2576   emit_int8(0x6F);
2577   emit_operand(dst, src);
2578 }
2579 
2580 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2581   assert(VM_Version::supports_evex(), "");
2582   InstructionMark im(this);
2583   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2584   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2585   attributes.set_is_evex_instruction();
2586   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2587   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2588   emit_int8(0x6F);
2589   emit_operand(dst, src);
2590 }
2591 
2592 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2593   assert(is_vector_masking(), "");
2594   assert(VM_Version::supports_avx512vlbw(), "");
2595   InstructionMark im(this);
2596   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2597   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2598   attributes.set_embedded_opmask_register_specifier(mask);
2599   attributes.set_is_evex_instruction();
2600   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2601   emit_int8(0x6F);
2602   emit_operand(dst, src);
2603 }
2604 
2605 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2606   assert(VM_Version::supports_evex(), "");
2607   assert(src != xnoreg, "sanity");
2608   InstructionMark im(this);
2609   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2610   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2611   attributes.set_is_evex_instruction();
2612   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2613   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2614   emit_int8(0x7F);
2615   emit_operand(src, dst);
2616 }
2617 
2618 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2619   assert(VM_Version::supports_avx512vlbw(), "");
2620   assert(src != xnoreg, "sanity");
2621   InstructionMark im(this);
2622   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2623   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2624   attributes.reset_is_clear_context();
2625   attributes.set_embedded_opmask_register_specifier(mask);
2626   attributes.set_is_evex_instruction();
2627   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2628   emit_int8(0x7F);
2629   emit_operand(src, dst);
2630 }
2631 
2632 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2633   assert(VM_Version::supports_evex(), "");
2634   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2635   attributes.set_is_evex_instruction();
2636   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2637   emit_int8(0x6F);
2638   emit_int8((unsigned char)(0xC0 | encode));
2639 }
2640 
2641 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2642   assert(VM_Version::supports_evex(), "");
2643   InstructionMark im(this);
2644   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2645   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2646   attributes.set_is_evex_instruction();
2647   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2648   emit_int8(0x6F);
2649   emit_operand(dst, src);
2650 }
2651 
2652 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2653   assert(VM_Version::supports_evex(), "");
2654   assert(src != xnoreg, "sanity");
2655   InstructionMark im(this);
2656   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2657   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2658   attributes.reset_is_clear_context();
2659   attributes.set_is_evex_instruction();
2660   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2661   emit_int8(0x7F);
2662   emit_operand(src, dst);
2663 }
2664 
2665 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2666   assert(VM_Version::supports_evex(), "");
2667   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2668   attributes.set_is_evex_instruction();
2669   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2670   emit_int8(0x6F);
2671   emit_int8((unsigned char)(0xC0 | encode));
2672 }
2673 
2674 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2675   assert(VM_Version::supports_evex(), "");
2676   InstructionMark im(this);
2677   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2678   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2679   attributes.set_is_evex_instruction();
2680   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2681   emit_int8(0x6F);
2682   emit_operand(dst, src);
2683 }
2684 
2685 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2686   assert(VM_Version::supports_evex(), "");
2687   assert(src != xnoreg, "sanity");
2688   InstructionMark im(this);
2689   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2690   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2691   attributes.reset_is_clear_context();
2692   attributes.set_is_evex_instruction();
2693   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2694   emit_int8(0x7F);
2695   emit_operand(src, dst);
2696 }
2697 
2698 // Uses zero extension on 64bit
2699 
2700 void Assembler::movl(Register dst, int32_t imm32) {
2701   int encode = prefix_and_encode(dst->encoding());
2702   emit_int8((unsigned char)(0xB8 | encode));
2703   emit_int32(imm32);
2704 }
2705 
2706 void Assembler::movl(Register dst, Register src) {
2707   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2708   emit_int8((unsigned char)0x8B);
2709   emit_int8((unsigned char)(0xC0 | encode));
2710 }
2711 
2712 void Assembler::movl(Register dst, Address src) {
2713   InstructionMark im(this);
2714   prefix(src, dst);
2715   emit_int8((unsigned char)0x8B);
2716   emit_operand(dst, src);
2717 }
2718 
2719 void Assembler::movl(Address dst, int32_t imm32) {
2720   InstructionMark im(this);
2721   prefix(dst);
2722   emit_int8((unsigned char)0xC7);
2723   emit_operand(rax, dst, 4);
2724   emit_int32(imm32);
2725 }
2726 
2727 void Assembler::movl(Address dst, Register src) {
2728   InstructionMark im(this);
2729   prefix(dst, src);
2730   emit_int8((unsigned char)0x89);
2731   emit_operand(src, dst);
2732 }
2733 
2734 // New cpus require to use movsd and movss to avoid partial register stall
2735 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2736 // The selection is done in MacroAssembler::movdbl() and movflt().
2737 void Assembler::movlpd(XMMRegister dst, Address src) {
2738   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2739   InstructionMark im(this);
2740   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2741   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2742   attributes.set_rex_vex_w_reverted();
2743   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2744   emit_int8(0x12);
2745   emit_operand(dst, src);
2746 }
2747 
2748 void Assembler::movq( MMXRegister dst, Address src ) {
2749   assert( VM_Version::supports_mmx(), "" );
2750   emit_int8(0x0F);
2751   emit_int8(0x6F);
2752   emit_operand(dst, src);
2753 }
2754 
2755 void Assembler::movq( Address dst, MMXRegister src ) {
2756   assert( VM_Version::supports_mmx(), "" );
2757   emit_int8(0x0F);
2758   emit_int8(0x7F);
2759   // workaround gcc (3.2.1-7a) bug
2760   // In that version of gcc with only an emit_operand(MMX, Address)
2761   // gcc will tail jump and try and reverse the parameters completely
2762   // obliterating dst in the process. By having a version available
2763   // that doesn't need to swap the args at the tail jump the bug is
2764   // avoided.
2765   emit_operand(dst, src);
2766 }
2767 
2768 void Assembler::movq(XMMRegister dst, Address src) {
2769   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2770   InstructionMark im(this);
2771   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2772   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2773   attributes.set_rex_vex_w_reverted();
2774   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2775   emit_int8(0x7E);
2776   emit_operand(dst, src);
2777 }
2778 
2779 void Assembler::movq(Address dst, XMMRegister src) {
2780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2781   InstructionMark im(this);
2782   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2783   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2784   attributes.set_rex_vex_w_reverted();
2785   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2786   emit_int8((unsigned char)0xD6);
2787   emit_operand(src, dst);
2788 }
2789 
2790 void Assembler::movsbl(Register dst, Address src) { // movsxb
2791   InstructionMark im(this);
2792   prefix(src, dst);
2793   emit_int8(0x0F);
2794   emit_int8((unsigned char)0xBE);
2795   emit_operand(dst, src);
2796 }
2797 
2798 void Assembler::movsbl(Register dst, Register src) { // movsxb
2799   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2800   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2801   emit_int8(0x0F);
2802   emit_int8((unsigned char)0xBE);
2803   emit_int8((unsigned char)(0xC0 | encode));
2804 }
2805 
2806 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2807   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2808   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2809   attributes.set_rex_vex_w_reverted();
2810   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2811   emit_int8(0x10);
2812   emit_int8((unsigned char)(0xC0 | encode));
2813 }
2814 
2815 void Assembler::movsd(XMMRegister dst, Address src) {
2816   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2817   InstructionMark im(this);
2818   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2819   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2820   attributes.set_rex_vex_w_reverted();
2821   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2822   emit_int8(0x10);
2823   emit_operand(dst, src);
2824 }
2825 
2826 void Assembler::movsd(Address dst, XMMRegister src) {
2827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2828   InstructionMark im(this);
2829   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2830   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2831   attributes.reset_is_clear_context();
2832   attributes.set_rex_vex_w_reverted();
2833   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2834   emit_int8(0x11);
2835   emit_operand(src, dst);
2836 }
2837 
2838 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2839   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2840   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2841   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2842   emit_int8(0x10);
2843   emit_int8((unsigned char)(0xC0 | encode));
2844 }
2845 
2846 void Assembler::movss(XMMRegister dst, Address src) {
2847   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2848   InstructionMark im(this);
2849   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2850   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2851   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2852   emit_int8(0x10);
2853   emit_operand(dst, src);
2854 }
2855 
2856 void Assembler::movss(Address dst, XMMRegister src) {
2857   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2858   InstructionMark im(this);
2859   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2860   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2861   attributes.reset_is_clear_context();
2862   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2863   emit_int8(0x11);
2864   emit_operand(src, dst);
2865 }
2866 
2867 void Assembler::movswl(Register dst, Address src) { // movsxw
2868   InstructionMark im(this);
2869   prefix(src, dst);
2870   emit_int8(0x0F);
2871   emit_int8((unsigned char)0xBF);
2872   emit_operand(dst, src);
2873 }
2874 
2875 void Assembler::movswl(Register dst, Register src) { // movsxw
2876   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2877   emit_int8(0x0F);
2878   emit_int8((unsigned char)0xBF);
2879   emit_int8((unsigned char)(0xC0 | encode));
2880 }
2881 
2882 void Assembler::movw(Address dst, int imm16) {
2883   InstructionMark im(this);
2884 
2885   emit_int8(0x66); // switch to 16-bit mode
2886   prefix(dst);
2887   emit_int8((unsigned char)0xC7);
2888   emit_operand(rax, dst, 2);
2889   emit_int16(imm16);
2890 }
2891 
2892 void Assembler::movw(Register dst, Address src) {
2893   InstructionMark im(this);
2894   emit_int8(0x66);
2895   prefix(src, dst);
2896   emit_int8((unsigned char)0x8B);
2897   emit_operand(dst, src);
2898 }
2899 
2900 void Assembler::movw(Address dst, Register src) {
2901   InstructionMark im(this);
2902   emit_int8(0x66);
2903   prefix(dst, src);
2904   emit_int8((unsigned char)0x89);
2905   emit_operand(src, dst);
2906 }
2907 
2908 void Assembler::movzbl(Register dst, Address src) { // movzxb
2909   InstructionMark im(this);
2910   prefix(src, dst);
2911   emit_int8(0x0F);
2912   emit_int8((unsigned char)0xB6);
2913   emit_operand(dst, src);
2914 }
2915 
2916 void Assembler::movzbl(Register dst, Register src) { // movzxb
2917   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2918   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2919   emit_int8(0x0F);
2920   emit_int8((unsigned char)0xB6);
2921   emit_int8(0xC0 | encode);
2922 }
2923 
2924 void Assembler::movzwl(Register dst, Address src) { // movzxw
2925   InstructionMark im(this);
2926   prefix(src, dst);
2927   emit_int8(0x0F);
2928   emit_int8((unsigned char)0xB7);
2929   emit_operand(dst, src);
2930 }
2931 
2932 void Assembler::movzwl(Register dst, Register src) { // movzxw
2933   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2934   emit_int8(0x0F);
2935   emit_int8((unsigned char)0xB7);
2936   emit_int8(0xC0 | encode);
2937 }
2938 
2939 void Assembler::mull(Address src) {
2940   InstructionMark im(this);
2941   prefix(src);
2942   emit_int8((unsigned char)0xF7);
2943   emit_operand(rsp, src);
2944 }
2945 
2946 void Assembler::mull(Register src) {
2947   int encode = prefix_and_encode(src->encoding());
2948   emit_int8((unsigned char)0xF7);
2949   emit_int8((unsigned char)(0xE0 | encode));
2950 }
2951 
2952 void Assembler::mulsd(XMMRegister dst, Address src) {
2953   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2954   InstructionMark im(this);
2955   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2956   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2957   attributes.set_rex_vex_w_reverted();
2958   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2959   emit_int8(0x59);
2960   emit_operand(dst, src);
2961 }
2962 
2963 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2964   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2965   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2966   attributes.set_rex_vex_w_reverted();
2967   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2968   emit_int8(0x59);
2969   emit_int8((unsigned char)(0xC0 | encode));
2970 }
2971 
2972 void Assembler::mulss(XMMRegister dst, Address src) {
2973   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2974   InstructionMark im(this);
2975   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2976   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2977   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2978   emit_int8(0x59);
2979   emit_operand(dst, src);
2980 }
2981 
2982 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2983   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2984   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2985   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2986   emit_int8(0x59);
2987   emit_int8((unsigned char)(0xC0 | encode));
2988 }
2989 
2990 void Assembler::negl(Register dst) {
2991   int encode = prefix_and_encode(dst->encoding());
2992   emit_int8((unsigned char)0xF7);
2993   emit_int8((unsigned char)(0xD8 | encode));
2994 }
2995 
2996 void Assembler::nop(int i) {
2997 #ifdef ASSERT
2998   assert(i > 0, " ");
2999   // The fancy nops aren't currently recognized by debuggers making it a
3000   // pain to disassemble code while debugging. If asserts are on clearly
3001   // speed is not an issue so simply use the single byte traditional nop
3002   // to do alignment.
3003 
3004   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
3005   return;
3006 
3007 #endif // ASSERT
3008 
3009   if (UseAddressNop && VM_Version::is_intel()) {
3010     //
3011     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
3012     //  1: 0x90
3013     //  2: 0x66 0x90
3014     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3015     //  4: 0x0F 0x1F 0x40 0x00
3016     //  5: 0x0F 0x1F 0x44 0x00 0x00
3017     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3018     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3019     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3020     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3021     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3022     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3023 
3024     // The rest coding is Intel specific - don't use consecutive address nops
3025 
3026     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3027     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3028     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3029     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3030 
3031     while(i >= 15) {
3032       // For Intel don't generate consecutive addess nops (mix with regular nops)
3033       i -= 15;
3034       emit_int8(0x66);   // size prefix
3035       emit_int8(0x66);   // size prefix
3036       emit_int8(0x66);   // size prefix
3037       addr_nop_8();
3038       emit_int8(0x66);   // size prefix
3039       emit_int8(0x66);   // size prefix
3040       emit_int8(0x66);   // size prefix
3041       emit_int8((unsigned char)0x90);
3042                          // nop
3043     }
3044     switch (i) {
3045       case 14:
3046         emit_int8(0x66); // size prefix
3047       case 13:
3048         emit_int8(0x66); // size prefix
3049       case 12:
3050         addr_nop_8();
3051         emit_int8(0x66); // size prefix
3052         emit_int8(0x66); // size prefix
3053         emit_int8(0x66); // size prefix
3054         emit_int8((unsigned char)0x90);
3055                          // nop
3056         break;
3057       case 11:
3058         emit_int8(0x66); // size prefix
3059       case 10:
3060         emit_int8(0x66); // size prefix
3061       case 9:
3062         emit_int8(0x66); // size prefix
3063       case 8:
3064         addr_nop_8();
3065         break;
3066       case 7:
3067         addr_nop_7();
3068         break;
3069       case 6:
3070         emit_int8(0x66); // size prefix
3071       case 5:
3072         addr_nop_5();
3073         break;
3074       case 4:
3075         addr_nop_4();
3076         break;
3077       case 3:
3078         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3079         emit_int8(0x66); // size prefix
3080       case 2:
3081         emit_int8(0x66); // size prefix
3082       case 1:
3083         emit_int8((unsigned char)0x90);
3084                          // nop
3085         break;
3086       default:
3087         assert(i == 0, " ");
3088     }
3089     return;
3090   }
3091   if (UseAddressNop && VM_Version::is_amd()) {
3092     //
3093     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3094     //  1: 0x90
3095     //  2: 0x66 0x90
3096     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3097     //  4: 0x0F 0x1F 0x40 0x00
3098     //  5: 0x0F 0x1F 0x44 0x00 0x00
3099     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3100     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3101     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3102     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3103     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3104     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3105 
3106     // The rest coding is AMD specific - use consecutive address nops
3107 
3108     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3109     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3110     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3111     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3112     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3113     //     Size prefixes (0x66) are added for larger sizes
3114 
3115     while(i >= 22) {
3116       i -= 11;
3117       emit_int8(0x66); // size prefix
3118       emit_int8(0x66); // size prefix
3119       emit_int8(0x66); // size prefix
3120       addr_nop_8();
3121     }
3122     // Generate first nop for size between 21-12
3123     switch (i) {
3124       case 21:
3125         i -= 1;
3126         emit_int8(0x66); // size prefix
3127       case 20:
3128       case 19:
3129         i -= 1;
3130         emit_int8(0x66); // size prefix
3131       case 18:
3132       case 17:
3133         i -= 1;
3134         emit_int8(0x66); // size prefix
3135       case 16:
3136       case 15:
3137         i -= 8;
3138         addr_nop_8();
3139         break;
3140       case 14:
3141       case 13:
3142         i -= 7;
3143         addr_nop_7();
3144         break;
3145       case 12:
3146         i -= 6;
3147         emit_int8(0x66); // size prefix
3148         addr_nop_5();
3149         break;
3150       default:
3151         assert(i < 12, " ");
3152     }
3153 
3154     // Generate second nop for size between 11-1
3155     switch (i) {
3156       case 11:
3157         emit_int8(0x66); // size prefix
3158       case 10:
3159         emit_int8(0x66); // size prefix
3160       case 9:
3161         emit_int8(0x66); // size prefix
3162       case 8:
3163         addr_nop_8();
3164         break;
3165       case 7:
3166         addr_nop_7();
3167         break;
3168       case 6:
3169         emit_int8(0x66); // size prefix
3170       case 5:
3171         addr_nop_5();
3172         break;
3173       case 4:
3174         addr_nop_4();
3175         break;
3176       case 3:
3177         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3178         emit_int8(0x66); // size prefix
3179       case 2:
3180         emit_int8(0x66); // size prefix
3181       case 1:
3182         emit_int8((unsigned char)0x90);
3183                          // nop
3184         break;
3185       default:
3186         assert(i == 0, " ");
3187     }
3188     return;
3189   }
3190 
3191   if (UseAddressNop && VM_Version::is_zx()) {
3192     //
3193     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3194     //  1: 0x90
3195     //  2: 0x66 0x90
3196     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3197     //  4: 0x0F 0x1F 0x40 0x00
3198     //  5: 0x0F 0x1F 0x44 0x00 0x00
3199     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3200     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3201     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3202     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3203     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3204     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3205 
3206     // The rest coding is ZX specific - don't use consecutive address nops
3207 
3208     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3209     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3210     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3211     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3212 
3213     while (i >= 15) {
3214       // For ZX don't generate consecutive addess nops (mix with regular nops)
3215       i -= 15;
3216       emit_int8(0x66);   // size prefix
3217       emit_int8(0x66);   // size prefix
3218       emit_int8(0x66);   // size prefix
3219       addr_nop_8();
3220       emit_int8(0x66);   // size prefix
3221       emit_int8(0x66);   // size prefix
3222       emit_int8(0x66);   // size prefix
3223       emit_int8((unsigned char)0x90);
3224                          // nop
3225     }
3226     switch (i) {
3227       case 14:
3228         emit_int8(0x66); // size prefix
3229       case 13:
3230         emit_int8(0x66); // size prefix
3231       case 12:
3232         addr_nop_8();
3233         emit_int8(0x66); // size prefix
3234         emit_int8(0x66); // size prefix
3235         emit_int8(0x66); // size prefix
3236         emit_int8((unsigned char)0x90);
3237                          // nop
3238         break;
3239       case 11:
3240         emit_int8(0x66); // size prefix
3241       case 10:
3242         emit_int8(0x66); // size prefix
3243       case 9:
3244         emit_int8(0x66); // size prefix
3245       case 8:
3246         addr_nop_8();
3247         break;
3248       case 7:
3249         addr_nop_7();
3250         break;
3251       case 6:
3252         emit_int8(0x66); // size prefix
3253       case 5:
3254         addr_nop_5();
3255         break;
3256       case 4:
3257         addr_nop_4();
3258         break;
3259       case 3:
3260         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3261         emit_int8(0x66); // size prefix
3262       case 2:
3263         emit_int8(0x66); // size prefix
3264       case 1:
3265         emit_int8((unsigned char)0x90);
3266                          // nop
3267         break;
3268       default:
3269         assert(i == 0, " ");
3270     }
3271     return;
3272   }
3273 
3274   // Using nops with size prefixes "0x66 0x90".
3275   // From AMD Optimization Guide:
3276   //  1: 0x90
3277   //  2: 0x66 0x90
3278   //  3: 0x66 0x66 0x90
3279   //  4: 0x66 0x66 0x66 0x90
3280   //  5: 0x66 0x66 0x90 0x66 0x90
3281   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3282   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3283   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3284   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3285   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3286   //
3287   while(i > 12) {
3288     i -= 4;
3289     emit_int8(0x66); // size prefix
3290     emit_int8(0x66);
3291     emit_int8(0x66);
3292     emit_int8((unsigned char)0x90);
3293                      // nop
3294   }
3295   // 1 - 12 nops
3296   if(i > 8) {
3297     if(i > 9) {
3298       i -= 1;
3299       emit_int8(0x66);
3300     }
3301     i -= 3;
3302     emit_int8(0x66);
3303     emit_int8(0x66);
3304     emit_int8((unsigned char)0x90);
3305   }
3306   // 1 - 8 nops
3307   if(i > 4) {
3308     if(i > 6) {
3309       i -= 1;
3310       emit_int8(0x66);
3311     }
3312     i -= 3;
3313     emit_int8(0x66);
3314     emit_int8(0x66);
3315     emit_int8((unsigned char)0x90);
3316   }
3317   switch (i) {
3318     case 4:
3319       emit_int8(0x66);
3320     case 3:
3321       emit_int8(0x66);
3322     case 2:
3323       emit_int8(0x66);
3324     case 1:
3325       emit_int8((unsigned char)0x90);
3326       break;
3327     default:
3328       assert(i == 0, " ");
3329   }
3330 }
3331 
3332 void Assembler::notl(Register dst) {
3333   int encode = prefix_and_encode(dst->encoding());
3334   emit_int8((unsigned char)0xF7);
3335   emit_int8((unsigned char)(0xD0 | encode));
3336 }
3337 
3338 void Assembler::orl(Address dst, int32_t imm32) {
3339   InstructionMark im(this);
3340   prefix(dst);
3341   emit_arith_operand(0x81, rcx, dst, imm32);
3342 }
3343 
3344 void Assembler::orl(Register dst, int32_t imm32) {
3345   prefix(dst);
3346   emit_arith(0x81, 0xC8, dst, imm32);
3347 }
3348 
3349 void Assembler::orl(Register dst, Address src) {
3350   InstructionMark im(this);
3351   prefix(src, dst);
3352   emit_int8(0x0B);
3353   emit_operand(dst, src);
3354 }
3355 
3356 void Assembler::orl(Register dst, Register src) {
3357   (void) prefix_and_encode(dst->encoding(), src->encoding());
3358   emit_arith(0x0B, 0xC0, dst, src);
3359 }
3360 
3361 void Assembler::orl(Address dst, Register src) {
3362   InstructionMark im(this);
3363   prefix(dst, src);
3364   emit_int8(0x09);
3365   emit_operand(src, dst);
3366 }
3367 
3368 void Assembler::packuswb(XMMRegister dst, Address src) {
3369   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3370   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3371   InstructionMark im(this);
3372   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3373   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3374   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3375   emit_int8(0x67);
3376   emit_operand(dst, src);
3377 }
3378 
3379 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3380   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3381   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3382   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3383   emit_int8(0x67);
3384   emit_int8((unsigned char)(0xC0 | encode));
3385 }
3386 
3387 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3388   assert(UseAVX > 0, "some form of AVX must be enabled");
3389   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3390   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3391   emit_int8(0x67);
3392   emit_int8((unsigned char)(0xC0 | encode));
3393 }
3394 
3395 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3396   assert(VM_Version::supports_avx2(), "");
3397   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3398   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3399   emit_int8(0x00);
3400   emit_int8(0xC0 | encode);
3401   emit_int8(imm8);
3402 }
3403 
3404 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3405   assert(VM_Version::supports_avx2(), "");
3406   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3407   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3408   emit_int8(0x46);
3409   emit_int8(0xC0 | encode);
3410   emit_int8(imm8);
3411 }
3412 
3413 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3414   assert(VM_Version::supports_avx(), "");
3415   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3416   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3417   emit_int8(0x06);
3418   emit_int8(0xC0 | encode);
3419   emit_int8(imm8);
3420 }
3421 
3422 void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3423   assert(VM_Version::supports_evex(), "");
3424   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3425   attributes.set_is_evex_instruction();
3426   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3427   emit_int8(0x76);
3428   emit_int8((unsigned char)(0xC0 | encode));
3429 }
3430 
3431 
3432 void Assembler::pause() {
3433   emit_int8((unsigned char)0xF3);
3434   emit_int8((unsigned char)0x90);
3435 }
3436 
3437 void Assembler::ud2() {
3438   emit_int8(0x0F);
3439   emit_int8(0x0B);
3440 }
3441 
3442 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3443   assert(VM_Version::supports_sse4_2(), "");
3444   InstructionMark im(this);
3445   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3446   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3447   emit_int8(0x61);
3448   emit_operand(dst, src);
3449   emit_int8(imm8);
3450 }
3451 
3452 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3453   assert(VM_Version::supports_sse4_2(), "");
3454   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3455   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3456   emit_int8(0x61);
3457   emit_int8((unsigned char)(0xC0 | encode));
3458   emit_int8(imm8);
3459 }
3460 
3461 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3462 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3463   assert(VM_Version::supports_sse2(), "");
3464   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3465   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3466   emit_int8(0x74);
3467   emit_int8((unsigned char)(0xC0 | encode));
3468 }
3469 
3470 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3471 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3472   assert(VM_Version::supports_avx(), "");
3473   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3474   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3475   emit_int8(0x74);
3476   emit_int8((unsigned char)(0xC0 | encode));
3477 }
3478 
3479 // In this context, kdst is written the mask used to process the equal components
3480 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3481   assert(VM_Version::supports_avx512bw(), "");
3482   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3483   attributes.set_is_evex_instruction();
3484   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3485   emit_int8(0x74);
3486   emit_int8((unsigned char)(0xC0 | encode));
3487 }
3488 
3489 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3490   assert(VM_Version::supports_avx512vlbw(), "");
3491   InstructionMark im(this);
3492   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3493   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3494   attributes.set_is_evex_instruction();
3495   int dst_enc = kdst->encoding();
3496   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3497   emit_int8(0x64);
3498   emit_operand(as_Register(dst_enc), src);
3499 }
3500 
3501 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3502   assert(is_vector_masking(), "");
3503   assert(VM_Version::supports_avx512vlbw(), "");
3504   InstructionMark im(this);
3505   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3506   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3507   attributes.reset_is_clear_context();
3508   attributes.set_embedded_opmask_register_specifier(mask);
3509   attributes.set_is_evex_instruction();
3510   int dst_enc = kdst->encoding();
3511   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3512   emit_int8(0x64);
3513   emit_operand(as_Register(dst_enc), src);
3514 }
3515 
3516 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3517   assert(VM_Version::supports_avx512vlbw(), "");
3518   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3519   attributes.set_is_evex_instruction();
3520   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3521   emit_int8(0x3E);
3522   emit_int8((unsigned char)(0xC0 | encode));
3523   emit_int8(vcc);
3524 }
3525 
3526 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3527   assert(is_vector_masking(), "");
3528   assert(VM_Version::supports_avx512vlbw(), "");
3529   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3530   attributes.reset_is_clear_context();
3531   attributes.set_embedded_opmask_register_specifier(mask);
3532   attributes.set_is_evex_instruction();
3533   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3534   emit_int8(0x3E);
3535   emit_int8((unsigned char)(0xC0 | encode));
3536   emit_int8(vcc);
3537 }
3538 
3539 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3540   assert(VM_Version::supports_avx512vlbw(), "");
3541   InstructionMark im(this);
3542   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3543   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3544   attributes.set_is_evex_instruction();
3545   int dst_enc = kdst->encoding();
3546   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3547   emit_int8(0x3E);
3548   emit_operand(as_Register(dst_enc), src);
3549   emit_int8(vcc);
3550 }
3551 
3552 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3553   assert(VM_Version::supports_avx512bw(), "");
3554   InstructionMark im(this);
3555   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3556   attributes.set_is_evex_instruction();
3557   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3558   int dst_enc = kdst->encoding();
3559   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3560   emit_int8(0x74);
3561   emit_operand(as_Register(dst_enc), src);
3562 }
3563 
3564 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3565   assert(VM_Version::supports_avx512vlbw(), "");
3566   assert(is_vector_masking(), "");    // For stub code use only
3567   InstructionMark im(this);
3568   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3569   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3570   attributes.reset_is_clear_context();
3571   attributes.set_embedded_opmask_register_specifier(mask);
3572   attributes.set_is_evex_instruction();
3573   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3574   emit_int8(0x74);
3575   emit_operand(as_Register(kdst->encoding()), src);
3576 }
3577 
3578 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3579 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3580   assert(VM_Version::supports_sse2(), "");
3581   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3582   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3583   emit_int8(0x75);
3584   emit_int8((unsigned char)(0xC0 | encode));
3585 }
3586 
3587 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3588 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3589   assert(VM_Version::supports_avx(), "");
3590   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3591   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3592   emit_int8(0x75);
3593   emit_int8((unsigned char)(0xC0 | encode));
3594 }
3595 
3596 // In this context, kdst is written the mask used to process the equal components
3597 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3598   assert(VM_Version::supports_avx512bw(), "");
3599   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3600   attributes.set_is_evex_instruction();
3601   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3602   emit_int8(0x75);
3603   emit_int8((unsigned char)(0xC0 | encode));
3604 }
3605 
3606 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3607   assert(VM_Version::supports_avx512bw(), "");
3608   InstructionMark im(this);
3609   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3610   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3611   attributes.set_is_evex_instruction();
3612   int dst_enc = kdst->encoding();
3613   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3614   emit_int8(0x75);
3615   emit_operand(as_Register(dst_enc), src);
3616 }
3617 
3618 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3619 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3620   assert(VM_Version::supports_sse2(), "");
3621   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3622   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3623   emit_int8(0x76);
3624   emit_int8((unsigned char)(0xC0 | encode));
3625 }
3626 
3627 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3628 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3629   assert(VM_Version::supports_avx(), "");
3630   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3631   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3632   emit_int8(0x76);
3633   emit_int8((unsigned char)(0xC0 | encode));
3634 }
3635 
3636 // In this context, kdst is written the mask used to process the equal components
3637 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3638   assert(VM_Version::supports_evex(), "");
3639   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3640   attributes.set_is_evex_instruction();
3641   attributes.reset_is_clear_context();
3642   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3643   emit_int8(0x76);
3644   emit_int8((unsigned char)(0xC0 | encode));
3645 }
3646 
3647 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3648   assert(VM_Version::supports_evex(), "");
3649   InstructionMark im(this);
3650   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3651   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3652   attributes.reset_is_clear_context();
3653   attributes.set_is_evex_instruction();
3654   int dst_enc = kdst->encoding();
3655   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3656   emit_int8(0x76);
3657   emit_operand(as_Register(dst_enc), src);
3658 }
3659 
3660 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3661 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3662   assert(VM_Version::supports_sse4_1(), "");
3663   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3664   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3665   emit_int8(0x29);
3666   emit_int8((unsigned char)(0xC0 | encode));
3667 }
3668 
3669 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3670 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3671   assert(VM_Version::supports_avx(), "");
3672   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3673   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3674   emit_int8(0x29);
3675   emit_int8((unsigned char)(0xC0 | encode));
3676 }
3677 
3678 // In this context, kdst is written the mask used to process the equal components
3679 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3680   assert(VM_Version::supports_evex(), "");
3681   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3682   attributes.reset_is_clear_context();
3683   attributes.set_is_evex_instruction();
3684   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3685   emit_int8(0x29);
3686   emit_int8((unsigned char)(0xC0 | encode));
3687 }
3688 
3689 // In this context, kdst is written the mask used to process the equal components
3690 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3691   assert(VM_Version::supports_evex(), "");
3692   InstructionMark im(this);
3693   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3694   attributes.reset_is_clear_context();
3695   attributes.set_is_evex_instruction();
3696   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3697   int dst_enc = kdst->encoding();
3698   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3699   emit_int8(0x29);
3700   emit_operand(as_Register(dst_enc), src);
3701 }
3702 
3703 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3704   assert(VM_Version::supports_sse2(), "");
3705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3706   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3707   emit_int8((unsigned char)0xD7);
3708   emit_int8((unsigned char)(0xC0 | encode));
3709 }
3710 
3711 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3712   assert(VM_Version::supports_avx2(), "");
3713   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3714   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3715   emit_int8((unsigned char)0xD7);
3716   emit_int8((unsigned char)(0xC0 | encode));
3717 }
3718 
3719 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3720   assert(VM_Version::supports_sse4_1(), "");
3721   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3722   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3723   emit_int8(0x16);
3724   emit_int8((unsigned char)(0xC0 | encode));
3725   emit_int8(imm8);
3726 }
3727 
3728 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3729   assert(VM_Version::supports_sse4_1(), "");
3730   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3731   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3732   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3733   emit_int8(0x16);
3734   emit_operand(src, dst);
3735   emit_int8(imm8);
3736 }
3737 
3738 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3739   assert(VM_Version::supports_sse4_1(), "");
3740   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3741   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3742   emit_int8(0x16);
3743   emit_int8((unsigned char)(0xC0 | encode));
3744   emit_int8(imm8);
3745 }
3746 
3747 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3748   assert(VM_Version::supports_sse4_1(), "");
3749   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3750   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3751   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3752   emit_int8(0x16);
3753   emit_operand(src, dst);
3754   emit_int8(imm8);
3755 }
3756 
3757 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3758   assert(VM_Version::supports_sse2(), "");
3759   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3760   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3761   emit_int8((unsigned char)0xC5);
3762   emit_int8((unsigned char)(0xC0 | encode));
3763   emit_int8(imm8);
3764 }
3765 
3766 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3767   assert(VM_Version::supports_sse4_1(), "");
3768   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3769   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3770   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3771   emit_int8((unsigned char)0x15);
3772   emit_operand(src, dst);
3773   emit_int8(imm8);
3774 }
3775 
3776 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3777   assert(VM_Version::supports_sse4_1(), "");
3778   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3779   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3780   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3781   emit_int8(0x14);
3782   emit_operand(src, dst);
3783   emit_int8(imm8);
3784 }
3785 
3786 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3787   assert(VM_Version::supports_sse4_1(), "");
3788   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3789   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3790   emit_int8(0x22);
3791   emit_int8((unsigned char)(0xC0 | encode));
3792   emit_int8(imm8);
3793 }
3794 
3795 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3796   assert(VM_Version::supports_sse4_1(), "");
3797   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3798   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3799   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3800   emit_int8(0x22);
3801   emit_operand(dst,src);
3802   emit_int8(imm8);
3803 }
3804 
3805 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3806   assert(VM_Version::supports_sse4_1(), "");
3807   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3808   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3809   emit_int8(0x22);
3810   emit_int8((unsigned char)(0xC0 | encode));
3811   emit_int8(imm8);
3812 }
3813 
3814 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3815   assert(VM_Version::supports_sse4_1(), "");
3816   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3817   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3818   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3819   emit_int8(0x22);
3820   emit_operand(dst, src);
3821   emit_int8(imm8);
3822 }
3823 
3824 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3825   assert(VM_Version::supports_sse2(), "");
3826   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3827   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3828   emit_int8((unsigned char)0xC4);
3829   emit_int8((unsigned char)(0xC0 | encode));
3830   emit_int8(imm8);
3831 }
3832 
3833 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3834   assert(VM_Version::supports_sse2(), "");
3835   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3836   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3837   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3838   emit_int8((unsigned char)0xC4);
3839   emit_operand(dst, src);
3840   emit_int8(imm8);
3841 }
3842 
3843 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3844   assert(VM_Version::supports_sse4_1(), "");
3845   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3846   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3847   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3848   emit_int8(0x20);
3849   emit_operand(dst, src);
3850   emit_int8(imm8);
3851 }
3852 
3853 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3854   assert(VM_Version::supports_sse4_1(), "");
3855   InstructionMark im(this);
3856   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3857   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3858   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3859   emit_int8(0x30);
3860   emit_operand(dst, src);
3861 }
3862 
3863 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3864   assert(VM_Version::supports_sse4_1(), "");
3865   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3866   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3867   emit_int8(0x30);
3868   emit_int8((unsigned char)(0xC0 | encode));
3869 }
3870 
3871 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3872   assert(VM_Version::supports_avx(), "");
3873   InstructionMark im(this);
3874   assert(dst != xnoreg, "sanity");
3875   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3876   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3877   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3878   emit_int8(0x30);
3879   emit_operand(dst, src);
3880 }
3881 
3882 void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) {
3883   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3884   vector_len == AVX_256bit? VM_Version::supports_avx2() :
3885   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
3886   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3887   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3888   emit_int8(0x30);
3889   emit_int8((unsigned char) (0xC0 | encode));
3890 }
3891 
3892 
3893 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3894   assert(is_vector_masking(), "");
3895   assert(VM_Version::supports_avx512vlbw(), "");
3896   assert(dst != xnoreg, "sanity");
3897   InstructionMark im(this);
3898   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3899   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3900   attributes.set_embedded_opmask_register_specifier(mask);
3901   attributes.set_is_evex_instruction();
3902   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3903   emit_int8(0x30);
3904   emit_operand(dst, src);
3905 }
3906 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3907   assert(VM_Version::supports_avx512vlbw(), "");
3908   assert(src != xnoreg, "sanity");
3909   InstructionMark im(this);
3910   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3911   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3912   attributes.set_is_evex_instruction();
3913   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3914   emit_int8(0x30);
3915   emit_operand(src, dst);
3916 }
3917 
3918 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3919   assert(is_vector_masking(), "");
3920   assert(VM_Version::supports_avx512vlbw(), "");
3921   assert(src != xnoreg, "sanity");
3922   InstructionMark im(this);
3923   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3924   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3925   attributes.reset_is_clear_context();
3926   attributes.set_embedded_opmask_register_specifier(mask);
3927   attributes.set_is_evex_instruction();
3928   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3929   emit_int8(0x30);
3930   emit_operand(src, dst);
3931 }
3932 
3933 void Assembler::evpmovdb(Address dst, XMMRegister src, int vector_len) {
3934   assert(VM_Version::supports_evex(), "");
3935   assert(src != xnoreg, "sanity");
3936   InstructionMark im(this);
3937   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3938   attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit);
3939   attributes.set_is_evex_instruction();
3940   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3941   emit_int8(0x31);
3942   emit_operand(src, dst);
3943 }
3944 
3945 void Assembler::vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len) {
3946   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
3947   vector_len == AVX_256bit? VM_Version::supports_avx2() :
3948   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " ");
3949   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3950   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3951   emit_int8(0x33);
3952   emit_int8((unsigned char)(0xC0 | encode));
3953 }
3954 
3955 // generic
3956 void Assembler::pop(Register dst) {
3957   int encode = prefix_and_encode(dst->encoding());
3958   emit_int8(0x58 | encode);
3959 }
3960 
3961 void Assembler::popcntl(Register dst, Address src) {
3962   assert(VM_Version::supports_popcnt(), "must support");
3963   InstructionMark im(this);
3964   emit_int8((unsigned char)0xF3);
3965   prefix(src, dst);
3966   emit_int8(0x0F);
3967   emit_int8((unsigned char)0xB8);
3968   emit_operand(dst, src);
3969 }
3970 
3971 void Assembler::popcntl(Register dst, Register src) {
3972   assert(VM_Version::supports_popcnt(), "must support");
3973   emit_int8((unsigned char)0xF3);
3974   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3975   emit_int8(0x0F);
3976   emit_int8((unsigned char)0xB8);
3977   emit_int8((unsigned char)(0xC0 | encode));
3978 }
3979 
3980 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
3981   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
3982   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3983   attributes.set_is_evex_instruction();
3984   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3985   emit_int8(0x55);
3986   emit_int8((unsigned char)(0xC0 | encode));
3987 }
3988 
3989 void Assembler::popf() {
3990   emit_int8((unsigned char)0x9D);
3991 }
3992 
3993 #ifndef _LP64 // no 32bit push/pop on amd64
3994 void Assembler::popl(Address dst) {
3995   // NOTE: this will adjust stack by 8byte on 64bits
3996   InstructionMark im(this);
3997   prefix(dst);
3998   emit_int8((unsigned char)0x8F);
3999   emit_operand(rax, dst);
4000 }
4001 #endif
4002 
4003 void Assembler::prefetch_prefix(Address src) {
4004   prefix(src);
4005   emit_int8(0x0F);
4006 }
4007 
4008 void Assembler::prefetchnta(Address src) {
4009   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4010   InstructionMark im(this);
4011   prefetch_prefix(src);
4012   emit_int8(0x18);
4013   emit_operand(rax, src); // 0, src
4014 }
4015 
4016 void Assembler::prefetchr(Address src) {
4017   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4018   InstructionMark im(this);
4019   prefetch_prefix(src);
4020   emit_int8(0x0D);
4021   emit_operand(rax, src); // 0, src
4022 }
4023 
4024 void Assembler::prefetcht0(Address src) {
4025   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4026   InstructionMark im(this);
4027   prefetch_prefix(src);
4028   emit_int8(0x18);
4029   emit_operand(rcx, src); // 1, src
4030 }
4031 
4032 void Assembler::prefetcht1(Address src) {
4033   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4034   InstructionMark im(this);
4035   prefetch_prefix(src);
4036   emit_int8(0x18);
4037   emit_operand(rdx, src); // 2, src
4038 }
4039 
4040 void Assembler::prefetcht2(Address src) {
4041   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4042   InstructionMark im(this);
4043   prefetch_prefix(src);
4044   emit_int8(0x18);
4045   emit_operand(rbx, src); // 3, src
4046 }
4047 
4048 void Assembler::prefetchw(Address src) {
4049   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4050   InstructionMark im(this);
4051   prefetch_prefix(src);
4052   emit_int8(0x0D);
4053   emit_operand(rcx, src); // 1, src
4054 }
4055 
4056 void Assembler::prefix(Prefix p) {
4057   emit_int8(p);
4058 }
4059 
4060 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4061   assert(VM_Version::supports_ssse3(), "");
4062   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4063   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4064   emit_int8(0x00);
4065   emit_int8((unsigned char)(0xC0 | encode));
4066 }
4067 
4068 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4069   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4070          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4071          0, "");
4072   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4073   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4074   emit_int8(0x00);
4075   emit_int8((unsigned char)(0xC0 | encode));
4076 }
4077 
4078 void Assembler::pshufb(XMMRegister dst, Address src) {
4079   assert(VM_Version::supports_ssse3(), "");
4080   InstructionMark im(this);
4081   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4082   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4083   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4084   emit_int8(0x00);
4085   emit_operand(dst, src);
4086 }
4087 
4088 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4089   assert(isByte(mode), "invalid value");
4090   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4091   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4092   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4093   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4094   emit_int8(0x70);
4095   emit_int8((unsigned char)(0xC0 | encode));
4096   emit_int8(mode & 0xFF);
4097 }
4098 
4099 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4100   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4101          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4102          0, "");
4103   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4104   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4105   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4106   emit_int8(0x70);
4107   emit_int8((unsigned char)(0xC0 | encode));
4108   emit_int8(mode & 0xFF);
4109 }
4110 
4111 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4112   assert(isByte(mode), "invalid value");
4113   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4114   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4115   InstructionMark im(this);
4116   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4117   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4118   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4119   emit_int8(0x70);
4120   emit_operand(dst, src);
4121   emit_int8(mode & 0xFF);
4122 }
4123 
4124 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4125   assert(isByte(mode), "invalid value");
4126   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4127   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4128   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4129   emit_int8(0x70);
4130   emit_int8((unsigned char)(0xC0 | encode));
4131   emit_int8(mode & 0xFF);
4132 }
4133 
4134 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4135   assert(isByte(mode), "invalid value");
4136   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4137   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4138   InstructionMark im(this);
4139   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4140   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4141   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4142   emit_int8(0x70);
4143   emit_operand(dst, src);
4144   emit_int8(mode & 0xFF);
4145 }
4146 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4147   assert(VM_Version::supports_evex(), "requires EVEX support");
4148   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4149   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4150   attributes.set_is_evex_instruction();
4151   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4152   emit_int8(0x43);
4153   emit_int8((unsigned char)(0xC0 | encode));
4154   emit_int8(imm8 & 0xFF);
4155 }
4156 
4157 void Assembler::psrldq(XMMRegister dst, int shift) {
4158   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4159   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4160   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4161   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4162   emit_int8(0x73);
4163   emit_int8((unsigned char)(0xC0 | encode));
4164   emit_int8(shift);
4165 }
4166 
4167 void Assembler::pslldq(XMMRegister dst, int shift) {
4168   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4169   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4170   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4171   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4172   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4173   emit_int8(0x73);
4174   emit_int8((unsigned char)(0xC0 | encode));
4175   emit_int8(shift);
4176 }
4177 
4178 void Assembler::ptest(XMMRegister dst, Address src) {
4179   assert(VM_Version::supports_sse4_1(), "");
4180   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4181   InstructionMark im(this);
4182   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4183   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4184   emit_int8(0x17);
4185   emit_operand(dst, src);
4186 }
4187 
4188 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4189   assert(VM_Version::supports_sse4_1(), "");
4190   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4191   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4192   emit_int8(0x17);
4193   emit_int8((unsigned char)(0xC0 | encode));
4194 }
4195 
4196 void Assembler::vptest(XMMRegister dst, Address src) {
4197   assert(VM_Version::supports_avx(), "");
4198   InstructionMark im(this);
4199   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4200   assert(dst != xnoreg, "sanity");
4201   // swap src<->dst for encoding
4202   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4203   emit_int8(0x17);
4204   emit_operand(dst, src);
4205 }
4206 
4207 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4208   assert(VM_Version::supports_avx(), "");
4209   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4210   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4211   emit_int8(0x17);
4212   emit_int8((unsigned char)(0xC0 | encode));
4213 }
4214 
4215 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4216   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4217   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4218   InstructionMark im(this);
4219   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4220   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4221   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4222   emit_int8(0x60);
4223   emit_operand(dst, src);
4224 }
4225 
4226 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4227   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4228   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4229   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4230   emit_int8(0x60);
4231   emit_int8((unsigned char)(0xC0 | encode));
4232 }
4233 
4234 void Assembler::punpckldq(XMMRegister dst, Address src) {
4235   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4236   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4237   InstructionMark im(this);
4238   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4239   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4240   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4241   emit_int8(0x62);
4242   emit_operand(dst, src);
4243 }
4244 
4245 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4246   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4247   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4248   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4249   emit_int8(0x62);
4250   emit_int8((unsigned char)(0xC0 | encode));
4251 }
4252 
4253 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4254   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4255   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4256   attributes.set_rex_vex_w_reverted();
4257   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4258   emit_int8(0x6C);
4259   emit_int8((unsigned char)(0xC0 | encode));
4260 }
4261 
4262 void Assembler::push(int32_t imm32) {
4263   // in 64bits we push 64bits onto the stack but only
4264   // take a 32bit immediate
4265   emit_int8(0x68);
4266   emit_int32(imm32);
4267 }
4268 
4269 void Assembler::push(Register src) {
4270   int encode = prefix_and_encode(src->encoding());
4271 
4272   emit_int8(0x50 | encode);
4273 }
4274 
4275 void Assembler::pushf() {
4276   emit_int8((unsigned char)0x9C);
4277 }
4278 
4279 #ifndef _LP64 // no 32bit push/pop on amd64
4280 void Assembler::pushl(Address src) {
4281   // Note this will push 64bit on 64bit
4282   InstructionMark im(this);
4283   prefix(src);
4284   emit_int8((unsigned char)0xFF);
4285   emit_operand(rsi, src);
4286 }
4287 #endif
4288 
4289 void Assembler::rcll(Register dst, int imm8) {
4290   assert(isShiftCount(imm8), "illegal shift count");
4291   int encode = prefix_and_encode(dst->encoding());
4292   if (imm8 == 1) {
4293     emit_int8((unsigned char)0xD1);
4294     emit_int8((unsigned char)(0xD0 | encode));
4295   } else {
4296     emit_int8((unsigned char)0xC1);
4297     emit_int8((unsigned char)0xD0 | encode);
4298     emit_int8(imm8);
4299   }
4300 }
4301 
4302 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4303   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4304   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4305   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4306   emit_int8(0x53);
4307   emit_int8((unsigned char)(0xC0 | encode));
4308 }
4309 
4310 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4311   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4312   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4313   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4314   emit_int8(0x53);
4315   emit_int8((unsigned char)(0xC0 | encode));
4316 }
4317 
4318 void Assembler::rdtsc() {
4319   emit_int8((unsigned char)0x0F);
4320   emit_int8((unsigned char)0x31);
4321 }
4322 
4323 // copies data from [esi] to [edi] using rcx pointer sized words
4324 // generic
4325 void Assembler::rep_mov() {
4326   emit_int8((unsigned char)0xF3);
4327   // MOVSQ
4328   LP64_ONLY(prefix(REX_W));
4329   emit_int8((unsigned char)0xA5);
4330 }
4331 
4332 // sets rcx bytes with rax, value at [edi]
4333 void Assembler::rep_stosb() {
4334   emit_int8((unsigned char)0xF3); // REP
4335   LP64_ONLY(prefix(REX_W));
4336   emit_int8((unsigned char)0xAA); // STOSB
4337 }
4338 
4339 // sets rcx pointer sized words with rax, value at [edi]
4340 // generic
4341 void Assembler::rep_stos() {
4342   emit_int8((unsigned char)0xF3); // REP
4343   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4344   emit_int8((unsigned char)0xAB);
4345 }
4346 
4347 // scans rcx pointer sized words at [edi] for occurance of rax,
4348 // generic
4349 void Assembler::repne_scan() { // repne_scan
4350   emit_int8((unsigned char)0xF2);
4351   // SCASQ
4352   LP64_ONLY(prefix(REX_W));
4353   emit_int8((unsigned char)0xAF);
4354 }
4355 
4356 #ifdef _LP64
4357 // scans rcx 4 byte words at [edi] for occurance of rax,
4358 // generic
4359 void Assembler::repne_scanl() { // repne_scan
4360   emit_int8((unsigned char)0xF2);
4361   // SCASL
4362   emit_int8((unsigned char)0xAF);
4363 }
4364 #endif
4365 
4366 void Assembler::ret(int imm16) {
4367   if (imm16 == 0) {
4368     emit_int8((unsigned char)0xC3);
4369   } else {
4370     emit_int8((unsigned char)0xC2);
4371     emit_int16(imm16);
4372   }
4373 }
4374 
4375 void Assembler::sahf() {
4376 #ifdef _LP64
4377   // Not supported in 64bit mode
4378   ShouldNotReachHere();
4379 #endif
4380   emit_int8((unsigned char)0x9E);
4381 }
4382 
4383 void Assembler::sarl(Register dst, int imm8) {
4384   int encode = prefix_and_encode(dst->encoding());
4385   assert(isShiftCount(imm8), "illegal shift count");
4386   if (imm8 == 1) {
4387     emit_int8((unsigned char)0xD1);
4388     emit_int8((unsigned char)(0xF8 | encode));
4389   } else {
4390     emit_int8((unsigned char)0xC1);
4391     emit_int8((unsigned char)(0xF8 | encode));
4392     emit_int8(imm8);
4393   }
4394 }
4395 
4396 void Assembler::sarl(Register dst) {
4397   int encode = prefix_and_encode(dst->encoding());
4398   emit_int8((unsigned char)0xD3);
4399   emit_int8((unsigned char)(0xF8 | encode));
4400 }
4401 
4402 void Assembler::sbbl(Address dst, int32_t imm32) {
4403   InstructionMark im(this);
4404   prefix(dst);
4405   emit_arith_operand(0x81, rbx, dst, imm32);
4406 }
4407 
4408 void Assembler::sbbl(Register dst, int32_t imm32) {
4409   prefix(dst);
4410   emit_arith(0x81, 0xD8, dst, imm32);
4411 }
4412 
4413 
4414 void Assembler::sbbl(Register dst, Address src) {
4415   InstructionMark im(this);
4416   prefix(src, dst);
4417   emit_int8(0x1B);
4418   emit_operand(dst, src);
4419 }
4420 
4421 void Assembler::sbbl(Register dst, Register src) {
4422   (void) prefix_and_encode(dst->encoding(), src->encoding());
4423   emit_arith(0x1B, 0xC0, dst, src);
4424 }
4425 
4426 void Assembler::setb(Condition cc, Register dst) {
4427   assert(0 <= cc && cc < 16, "illegal cc");
4428   int encode = prefix_and_encode(dst->encoding(), true);
4429   emit_int8(0x0F);
4430   emit_int8((unsigned char)0x90 | cc);
4431   emit_int8((unsigned char)(0xC0 | encode));
4432 }
4433 
4434 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4435   assert(VM_Version::supports_ssse3(), "");
4436   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4437   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4438   emit_int8((unsigned char)0x0F);
4439   emit_int8((unsigned char)(0xC0 | encode));
4440   emit_int8(imm8);
4441 }
4442 
4443 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4444   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4445          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4446          0, "");
4447   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4448   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4449   emit_int8((unsigned char)0x0F);
4450   emit_int8((unsigned char)(0xC0 | encode));
4451   emit_int8(imm8);
4452 }
4453 
4454 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4455   assert(VM_Version::supports_sse4_1(), "");
4456   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4457   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4458   emit_int8((unsigned char)0x0E);
4459   emit_int8((unsigned char)(0xC0 | encode));
4460   emit_int8(imm8);
4461 }
4462 
4463 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4464   assert(VM_Version::supports_sha(), "");
4465   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4466   emit_int8((unsigned char)0xCC);
4467   emit_int8((unsigned char)(0xC0 | encode));
4468   emit_int8((unsigned char)imm8);
4469 }
4470 
4471 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4472   assert(VM_Version::supports_sha(), "");
4473   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4474   emit_int8((unsigned char)0xC8);
4475   emit_int8((unsigned char)(0xC0 | encode));
4476 }
4477 
4478 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4479   assert(VM_Version::supports_sha(), "");
4480   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4481   emit_int8((unsigned char)0xC9);
4482   emit_int8((unsigned char)(0xC0 | encode));
4483 }
4484 
4485 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4486   assert(VM_Version::supports_sha(), "");
4487   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4488   emit_int8((unsigned char)0xCA);
4489   emit_int8((unsigned char)(0xC0 | encode));
4490 }
4491 
4492 // xmm0 is implicit additional source to this instruction.
4493 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4494   assert(VM_Version::supports_sha(), "");
4495   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4496   emit_int8((unsigned char)0xCB);
4497   emit_int8((unsigned char)(0xC0 | encode));
4498 }
4499 
4500 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4501   assert(VM_Version::supports_sha(), "");
4502   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4503   emit_int8((unsigned char)0xCC);
4504   emit_int8((unsigned char)(0xC0 | encode));
4505 }
4506 
4507 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4508   assert(VM_Version::supports_sha(), "");
4509   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4510   emit_int8((unsigned char)0xCD);
4511   emit_int8((unsigned char)(0xC0 | encode));
4512 }
4513 
4514 
4515 void Assembler::shll(Register dst, int imm8) {
4516   assert(isShiftCount(imm8), "illegal shift count");
4517   int encode = prefix_and_encode(dst->encoding());
4518   if (imm8 == 1 ) {
4519     emit_int8((unsigned char)0xD1);
4520     emit_int8((unsigned char)(0xE0 | encode));
4521   } else {
4522     emit_int8((unsigned char)0xC1);
4523     emit_int8((unsigned char)(0xE0 | encode));
4524     emit_int8(imm8);
4525   }
4526 }
4527 
4528 void Assembler::shll(Register dst) {
4529   int encode = prefix_and_encode(dst->encoding());
4530   emit_int8((unsigned char)0xD3);
4531   emit_int8((unsigned char)(0xE0 | encode));
4532 }
4533 
4534 void Assembler::shrl(Register dst, int imm8) {
4535   assert(isShiftCount(imm8), "illegal shift count");
4536   int encode = prefix_and_encode(dst->encoding());
4537   emit_int8((unsigned char)0xC1);
4538   emit_int8((unsigned char)(0xE8 | encode));
4539   emit_int8(imm8);
4540 }
4541 
4542 void Assembler::shrl(Register dst) {
4543   int encode = prefix_and_encode(dst->encoding());
4544   emit_int8((unsigned char)0xD3);
4545   emit_int8((unsigned char)(0xE8 | encode));
4546 }
4547 
4548 // copies a single word from [esi] to [edi]
4549 void Assembler::smovl() {
4550   emit_int8((unsigned char)0xA5);
4551 }
4552 
4553 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4554   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4555   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4556   attributes.set_rex_vex_w_reverted();
4557   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4558   emit_int8(0x51);
4559   emit_int8((unsigned char)(0xC0 | encode));
4560 }
4561 
4562 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4563   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4564   InstructionMark im(this);
4565   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4566   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4567   attributes.set_rex_vex_w_reverted();
4568   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4569   emit_int8(0x51);
4570   emit_operand(dst, src);
4571 }
4572 
4573 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4574   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4575   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4576   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4577   emit_int8(0x51);
4578   emit_int8((unsigned char)(0xC0 | encode));
4579 }
4580 
4581 void Assembler::std() {
4582   emit_int8((unsigned char)0xFD);
4583 }
4584 
4585 void Assembler::sqrtss(XMMRegister dst, Address src) {
4586   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4587   InstructionMark im(this);
4588   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4589   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4590   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4591   emit_int8(0x51);
4592   emit_operand(dst, src);
4593 }
4594 
4595 void Assembler::stmxcsr( Address dst) {
4596   if (UseAVX > 0 ) {
4597     assert(VM_Version::supports_avx(), "");
4598     InstructionMark im(this);
4599     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4600     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4601     emit_int8((unsigned char)0xAE);
4602     emit_operand(as_Register(3), dst);
4603   } else {
4604     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4605     InstructionMark im(this);
4606     prefix(dst);
4607     emit_int8(0x0F);
4608     emit_int8((unsigned char)0xAE);
4609     emit_operand(as_Register(3), dst);
4610   }
4611 }
4612 
4613 void Assembler::subl(Address dst, int32_t imm32) {
4614   InstructionMark im(this);
4615   prefix(dst);
4616   emit_arith_operand(0x81, rbp, dst, imm32);
4617 }
4618 
4619 void Assembler::subl(Address dst, Register src) {
4620   InstructionMark im(this);
4621   prefix(dst, src);
4622   emit_int8(0x29);
4623   emit_operand(src, dst);
4624 }
4625 
4626 void Assembler::subl(Register dst, int32_t imm32) {
4627   prefix(dst);
4628   emit_arith(0x81, 0xE8, dst, imm32);
4629 }
4630 
4631 // Force generation of a 4 byte immediate value even if it fits into 8bit
4632 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4633   prefix(dst);
4634   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4635 }
4636 
4637 void Assembler::subl(Register dst, Address src) {
4638   InstructionMark im(this);
4639   prefix(src, dst);
4640   emit_int8(0x2B);
4641   emit_operand(dst, src);
4642 }
4643 
4644 void Assembler::subl(Register dst, Register src) {
4645   (void) prefix_and_encode(dst->encoding(), src->encoding());
4646   emit_arith(0x2B, 0xC0, dst, src);
4647 }
4648 
4649 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4651   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4652   attributes.set_rex_vex_w_reverted();
4653   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4654   emit_int8(0x5C);
4655   emit_int8((unsigned char)(0xC0 | encode));
4656 }
4657 
4658 void Assembler::subsd(XMMRegister dst, Address src) {
4659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4660   InstructionMark im(this);
4661   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4662   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4663   attributes.set_rex_vex_w_reverted();
4664   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4665   emit_int8(0x5C);
4666   emit_operand(dst, src);
4667 }
4668 
4669 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4670   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4671   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4672   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4673   emit_int8(0x5C);
4674   emit_int8((unsigned char)(0xC0 | encode));
4675 }
4676 
4677 void Assembler::subss(XMMRegister dst, Address src) {
4678   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4679   InstructionMark im(this);
4680   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4681   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4682   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4683   emit_int8(0x5C);
4684   emit_operand(dst, src);
4685 }
4686 
4687 void Assembler::testb(Register dst, int imm8) {
4688   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4689   (void) prefix_and_encode(dst->encoding(), true);
4690   emit_arith_b(0xF6, 0xC0, dst, imm8);
4691 }
4692 
4693 void Assembler::testb(Address dst, int imm8) {
4694   InstructionMark im(this);
4695   prefix(dst);
4696   emit_int8((unsigned char)0xF6);
4697   emit_operand(rax, dst, 1);
4698   emit_int8(imm8);
4699 }
4700 
4701 void Assembler::testl(Register dst, int32_t imm32) {
4702   // not using emit_arith because test
4703   // doesn't support sign-extension of
4704   // 8bit operands
4705   int encode = dst->encoding();
4706   if (encode == 0) {
4707     emit_int8((unsigned char)0xA9);
4708   } else {
4709     encode = prefix_and_encode(encode);
4710     emit_int8((unsigned char)0xF7);
4711     emit_int8((unsigned char)(0xC0 | encode));
4712   }
4713   emit_int32(imm32);
4714 }
4715 
4716 void Assembler::testl(Register dst, Register src) {
4717   (void) prefix_and_encode(dst->encoding(), src->encoding());
4718   emit_arith(0x85, 0xC0, dst, src);
4719 }
4720 
4721 void Assembler::testl(Register dst, Address src) {
4722   InstructionMark im(this);
4723   prefix(src, dst);
4724   emit_int8((unsigned char)0x85);
4725   emit_operand(dst, src);
4726 }
4727 
4728 void Assembler::tzcntl(Register dst, Register src) {
4729   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4730   emit_int8((unsigned char)0xF3);
4731   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4732   emit_int8(0x0F);
4733   emit_int8((unsigned char)0xBC);
4734   emit_int8((unsigned char)0xC0 | encode);
4735 }
4736 
4737 void Assembler::tzcntq(Register dst, Register src) {
4738   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4739   emit_int8((unsigned char)0xF3);
4740   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4741   emit_int8(0x0F);
4742   emit_int8((unsigned char)0xBC);
4743   emit_int8((unsigned char)(0xC0 | encode));
4744 }
4745 
4746 void Assembler::ucomisd(XMMRegister dst, Address src) {
4747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4748   InstructionMark im(this);
4749   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4750   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4751   attributes.set_rex_vex_w_reverted();
4752   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4753   emit_int8(0x2E);
4754   emit_operand(dst, src);
4755 }
4756 
4757 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4759   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4760   attributes.set_rex_vex_w_reverted();
4761   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4762   emit_int8(0x2E);
4763   emit_int8((unsigned char)(0xC0 | encode));
4764 }
4765 
4766 void Assembler::ucomiss(XMMRegister dst, Address src) {
4767   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4768   InstructionMark im(this);
4769   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4770   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4771   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4772   emit_int8(0x2E);
4773   emit_operand(dst, src);
4774 }
4775 
4776 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4777   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4778   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4779   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4780   emit_int8(0x2E);
4781   emit_int8((unsigned char)(0xC0 | encode));
4782 }
4783 
4784 void Assembler::xabort(int8_t imm8) {
4785   emit_int8((unsigned char)0xC6);
4786   emit_int8((unsigned char)0xF8);
4787   emit_int8((unsigned char)(imm8 & 0xFF));
4788 }
4789 
4790 void Assembler::xaddb(Address dst, Register src) {
4791   InstructionMark im(this);
4792   prefix(dst, src, true);
4793   emit_int8(0x0F);
4794   emit_int8((unsigned char)0xC0);
4795   emit_operand(src, dst);
4796 }
4797 
4798 void Assembler::xaddw(Address dst, Register src) {
4799   InstructionMark im(this);
4800   emit_int8(0x66);
4801   prefix(dst, src);
4802   emit_int8(0x0F);
4803   emit_int8((unsigned char)0xC1);
4804   emit_operand(src, dst);
4805 }
4806 
4807 void Assembler::xaddl(Address dst, Register src) {
4808   InstructionMark im(this);
4809   prefix(dst, src);
4810   emit_int8(0x0F);
4811   emit_int8((unsigned char)0xC1);
4812   emit_operand(src, dst);
4813 }
4814 
4815 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4816   InstructionMark im(this);
4817   relocate(rtype);
4818   if (abort.is_bound()) {
4819     address entry = target(abort);
4820     assert(entry != NULL, "abort entry NULL");
4821     intptr_t offset = entry - pc();
4822     emit_int8((unsigned char)0xC7);
4823     emit_int8((unsigned char)0xF8);
4824     emit_int32(offset - 6); // 2 opcode + 4 address
4825   } else {
4826     abort.add_patch_at(code(), locator());
4827     emit_int8((unsigned char)0xC7);
4828     emit_int8((unsigned char)0xF8);
4829     emit_int32(0);
4830   }
4831 }
4832 
4833 void Assembler::xchgb(Register dst, Address src) { // xchg
4834   InstructionMark im(this);
4835   prefix(src, dst, true);
4836   emit_int8((unsigned char)0x86);
4837   emit_operand(dst, src);
4838 }
4839 
4840 void Assembler::xchgw(Register dst, Address src) { // xchg
4841   InstructionMark im(this);
4842   emit_int8(0x66);
4843   prefix(src, dst);
4844   emit_int8((unsigned char)0x87);
4845   emit_operand(dst, src);
4846 }
4847 
4848 void Assembler::xchgl(Register dst, Address src) { // xchg
4849   InstructionMark im(this);
4850   prefix(src, dst);
4851   emit_int8((unsigned char)0x87);
4852   emit_operand(dst, src);
4853 }
4854 
4855 void Assembler::xchgl(Register dst, Register src) {
4856   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4857   emit_int8((unsigned char)0x87);
4858   emit_int8((unsigned char)(0xC0 | encode));
4859 }
4860 
4861 void Assembler::xend() {
4862   emit_int8((unsigned char)0x0F);
4863   emit_int8((unsigned char)0x01);
4864   emit_int8((unsigned char)0xD5);
4865 }
4866 
4867 void Assembler::xgetbv() {
4868   emit_int8(0x0F);
4869   emit_int8(0x01);
4870   emit_int8((unsigned char)0xD0);
4871 }
4872 
4873 void Assembler::xorl(Register dst, int32_t imm32) {
4874   prefix(dst);
4875   emit_arith(0x81, 0xF0, dst, imm32);
4876 }
4877 
4878 void Assembler::xorl(Register dst, Address src) {
4879   InstructionMark im(this);
4880   prefix(src, dst);
4881   emit_int8(0x33);
4882   emit_operand(dst, src);
4883 }
4884 
4885 void Assembler::xorl(Register dst, Register src) {
4886   (void) prefix_and_encode(dst->encoding(), src->encoding());
4887   emit_arith(0x33, 0xC0, dst, src);
4888 }
4889 
4890 void Assembler::xorb(Register dst, Address src) {
4891   InstructionMark im(this);
4892   prefix(src, dst);
4893   emit_int8(0x32);
4894   emit_operand(dst, src);
4895 }
4896 
4897 // AVX 3-operands scalar float-point arithmetic instructions
4898 
4899 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4900   assert(VM_Version::supports_avx(), "");
4901   InstructionMark im(this);
4902   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4903   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4904   attributes.set_rex_vex_w_reverted();
4905   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4906   emit_int8(0x58);
4907   emit_operand(dst, src);
4908 }
4909 
4910 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4911   assert(VM_Version::supports_avx(), "");
4912   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4913   attributes.set_rex_vex_w_reverted();
4914   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4915   emit_int8(0x58);
4916   emit_int8((unsigned char)(0xC0 | encode));
4917 }
4918 
4919 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4920   assert(VM_Version::supports_avx(), "");
4921   InstructionMark im(this);
4922   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4923   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4924   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4925   emit_int8(0x58);
4926   emit_operand(dst, src);
4927 }
4928 
4929 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4930   assert(VM_Version::supports_avx(), "");
4931   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4932   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4933   emit_int8(0x58);
4934   emit_int8((unsigned char)(0xC0 | encode));
4935 }
4936 
4937 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4938   assert(VM_Version::supports_avx(), "");
4939   InstructionMark im(this);
4940   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4941   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4942   attributes.set_rex_vex_w_reverted();
4943   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4944   emit_int8(0x5E);
4945   emit_operand(dst, src);
4946 }
4947 
4948 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4949   assert(VM_Version::supports_avx(), "");
4950   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4951   attributes.set_rex_vex_w_reverted();
4952   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4953   emit_int8(0x5E);
4954   emit_int8((unsigned char)(0xC0 | encode));
4955 }
4956 
4957 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4958   assert(VM_Version::supports_avx(), "");
4959   InstructionMark im(this);
4960   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4961   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4962   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4963   emit_int8(0x5E);
4964   emit_operand(dst, src);
4965 }
4966 
4967 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4968   assert(VM_Version::supports_avx(), "");
4969   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4970   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4971   emit_int8(0x5E);
4972   emit_int8((unsigned char)(0xC0 | encode));
4973 }
4974 
4975 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4976   assert(VM_Version::supports_fma(), "");
4977   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4978   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4979   emit_int8((unsigned char)0xB9);
4980   emit_int8((unsigned char)(0xC0 | encode));
4981 }
4982 
4983 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4984   assert(VM_Version::supports_fma(), "");
4985   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4986   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4987   emit_int8((unsigned char)0xB9);
4988   emit_int8((unsigned char)(0xC0 | encode));
4989 }
4990 
4991 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4992   assert(VM_Version::supports_avx(), "");
4993   InstructionMark im(this);
4994   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4995   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4996   attributes.set_rex_vex_w_reverted();
4997   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4998   emit_int8(0x59);
4999   emit_operand(dst, src);
5000 }
5001 
5002 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5003   assert(VM_Version::supports_avx(), "");
5004   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5005   attributes.set_rex_vex_w_reverted();
5006   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5007   emit_int8(0x59);
5008   emit_int8((unsigned char)(0xC0 | encode));
5009 }
5010 
5011 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
5012   assert(VM_Version::supports_avx(), "");
5013   InstructionMark im(this);
5014   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5015   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5016   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5017   emit_int8(0x59);
5018   emit_operand(dst, src);
5019 }
5020 
5021 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5022   assert(VM_Version::supports_avx(), "");
5023   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5024   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5025   emit_int8(0x59);
5026   emit_int8((unsigned char)(0xC0 | encode));
5027 }
5028 
5029 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
5030   assert(VM_Version::supports_avx(), "");
5031   InstructionMark im(this);
5032   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5033   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5034   attributes.set_rex_vex_w_reverted();
5035   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5036   emit_int8(0x5C);
5037   emit_operand(dst, src);
5038 }
5039 
5040 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5041   assert(VM_Version::supports_avx(), "");
5042   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5043   attributes.set_rex_vex_w_reverted();
5044   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5045   emit_int8(0x5C);
5046   emit_int8((unsigned char)(0xC0 | encode));
5047 }
5048 
5049 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
5050   assert(VM_Version::supports_avx(), "");
5051   InstructionMark im(this);
5052   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5053   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5054   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5055   emit_int8(0x5C);
5056   emit_operand(dst, src);
5057 }
5058 
5059 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5060   assert(VM_Version::supports_avx(), "");
5061   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5062   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5063   emit_int8(0x5C);
5064   emit_int8((unsigned char)(0xC0 | encode));
5065 }
5066 
5067 //====================VECTOR ARITHMETIC=====================================
5068 
5069 // Float-point vector arithmetic
5070 
5071 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5072   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5073   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5074   attributes.set_rex_vex_w_reverted();
5075   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5076   emit_int8(0x58);
5077   emit_int8((unsigned char)(0xC0 | encode));
5078 }
5079 
5080 void Assembler::addpd(XMMRegister dst, Address src) {
5081   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5082   InstructionMark im(this);
5083   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5084   attributes.set_rex_vex_w_reverted();
5085   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5086   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5087   emit_int8(0x58);
5088   emit_operand(dst, src);
5089 }
5090 
5091 
5092 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5093   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5094   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5095   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5096   emit_int8(0x58);
5097   emit_int8((unsigned char)(0xC0 | encode));
5098 }
5099 
5100 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5101   assert(VM_Version::supports_avx(), "");
5102   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5103   attributes.set_rex_vex_w_reverted();
5104   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5105   emit_int8(0x58);
5106   emit_int8((unsigned char)(0xC0 | encode));
5107 }
5108 
5109 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5110   assert(VM_Version::supports_avx(), "");
5111   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5112   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5113   emit_int8(0x58);
5114   emit_int8((unsigned char)(0xC0 | encode));
5115 }
5116 
5117 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5118   assert(VM_Version::supports_avx(), "");
5119   InstructionMark im(this);
5120   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5121   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5122   attributes.set_rex_vex_w_reverted();
5123   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5124   emit_int8(0x58);
5125   emit_operand(dst, src);
5126 }
5127 
5128 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5129   assert(VM_Version::supports_avx(), "");
5130   InstructionMark im(this);
5131   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5132   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5133   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5134   emit_int8(0x58);
5135   emit_operand(dst, src);
5136 }
5137 
5138 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5139   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5140   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5141   attributes.set_rex_vex_w_reverted();
5142   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5143   emit_int8(0x5C);
5144   emit_int8((unsigned char)(0xC0 | encode));
5145 }
5146 
5147 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5148   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5149   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5150   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5151   emit_int8(0x5C);
5152   emit_int8((unsigned char)(0xC0 | encode));
5153 }
5154 
5155 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5156   assert(VM_Version::supports_avx(), "");
5157   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5158   attributes.set_rex_vex_w_reverted();
5159   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5160   emit_int8(0x5C);
5161   emit_int8((unsigned char)(0xC0 | encode));
5162 }
5163 
5164 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5165   assert(VM_Version::supports_avx(), "");
5166   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5167   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5168   emit_int8(0x5C);
5169   emit_int8((unsigned char)(0xC0 | encode));
5170 }
5171 
5172 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5173   assert(VM_Version::supports_avx(), "");
5174   InstructionMark im(this);
5175   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5176   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5177   attributes.set_rex_vex_w_reverted();
5178   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5179   emit_int8(0x5C);
5180   emit_operand(dst, src);
5181 }
5182 
5183 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5184   assert(VM_Version::supports_avx(), "");
5185   InstructionMark im(this);
5186   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5187   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5188   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5189   emit_int8(0x5C);
5190   emit_operand(dst, src);
5191 }
5192 
5193 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5194   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5195   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5196   attributes.set_rex_vex_w_reverted();
5197   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5198   emit_int8(0x59);
5199   emit_int8((unsigned char)(0xC0 | encode));
5200 }
5201 
5202 void Assembler::mulpd(XMMRegister dst, Address src) {
5203   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5204   InstructionMark im(this);
5205   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5206   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5207   attributes.set_rex_vex_w_reverted();
5208   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5209   emit_int8(0x59);
5210   emit_operand(dst, src);
5211 }
5212 
5213 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5214   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5215   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5216   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5217   emit_int8(0x59);
5218   emit_int8((unsigned char)(0xC0 | encode));
5219 }
5220 
5221 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5222   assert(VM_Version::supports_avx(), "");
5223   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5224   attributes.set_rex_vex_w_reverted();
5225   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5226   emit_int8(0x59);
5227   emit_int8((unsigned char)(0xC0 | encode));
5228 }
5229 
5230 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5231   assert(VM_Version::supports_avx(), "");
5232   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5233   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5234   emit_int8(0x59);
5235   emit_int8((unsigned char)(0xC0 | encode));
5236 }
5237 
5238 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5239   assert(VM_Version::supports_avx(), "");
5240   InstructionMark im(this);
5241   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5242   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5243   attributes.set_rex_vex_w_reverted();
5244   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5245   emit_int8(0x59);
5246   emit_operand(dst, src);
5247 }
5248 
5249 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5250   assert(VM_Version::supports_avx(), "");
5251   InstructionMark im(this);
5252   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5253   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5254   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5255   emit_int8(0x59);
5256   emit_operand(dst, src);
5257 }
5258 
5259 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5260   assert(VM_Version::supports_fma(), "");
5261   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5262   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5263   emit_int8((unsigned char)0xB8);
5264   emit_int8((unsigned char)(0xC0 | encode));
5265 }
5266 
5267 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5268   assert(VM_Version::supports_fma(), "");
5269   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5270   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5271   emit_int8((unsigned char)0xB8);
5272   emit_int8((unsigned char)(0xC0 | encode));
5273 }
5274 
5275 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5276   assert(VM_Version::supports_fma(), "");
5277   InstructionMark im(this);
5278   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5279   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5280   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5281   emit_int8((unsigned char)0xB8);
5282   emit_operand(dst, src2);
5283 }
5284 
5285 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5286   assert(VM_Version::supports_fma(), "");
5287   InstructionMark im(this);
5288   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5289   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5290   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5291   emit_int8((unsigned char)0xB8);
5292   emit_operand(dst, src2);
5293 }
5294 
5295 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5296   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5297   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5298   attributes.set_rex_vex_w_reverted();
5299   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5300   emit_int8(0x5E);
5301   emit_int8((unsigned char)(0xC0 | encode));
5302 }
5303 
5304 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5305   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5306   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5307   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5308   emit_int8(0x5E);
5309   emit_int8((unsigned char)(0xC0 | encode));
5310 }
5311 
5312 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5313   assert(VM_Version::supports_avx(), "");
5314   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5315   attributes.set_rex_vex_w_reverted();
5316   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5317   emit_int8(0x5E);
5318   emit_int8((unsigned char)(0xC0 | encode));
5319 }
5320 
5321 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5322   assert(VM_Version::supports_avx(), "");
5323   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5324   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5325   emit_int8(0x5E);
5326   emit_int8((unsigned char)(0xC0 | encode));
5327 }
5328 
5329 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5330   assert(VM_Version::supports_avx(), "");
5331   InstructionMark im(this);
5332   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5333   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5334   attributes.set_rex_vex_w_reverted();
5335   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5336   emit_int8(0x5E);
5337   emit_operand(dst, src);
5338 }
5339 
5340 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5341   assert(VM_Version::supports_avx(), "");
5342   InstructionMark im(this);
5343   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5344   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5345   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5346   emit_int8(0x5E);
5347   emit_operand(dst, src);
5348 }
5349 
5350 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5351   assert(VM_Version::supports_avx(), "");
5352   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5353   attributes.set_rex_vex_w_reverted();
5354   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5355   emit_int8(0x51);
5356   emit_int8((unsigned char)(0xC0 | encode));
5357 }
5358 
5359 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5360   assert(VM_Version::supports_avx(), "");
5361   InstructionMark im(this);
5362   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5363   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5364   attributes.set_rex_vex_w_reverted();
5365   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5366   emit_int8(0x51);
5367   emit_operand(dst, src);
5368 }
5369 
5370 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5371   assert(VM_Version::supports_avx(), "");
5372   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5373   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5374   emit_int8(0x51);
5375   emit_int8((unsigned char)(0xC0 | encode));
5376 }
5377 
5378 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5379   assert(VM_Version::supports_avx(), "");
5380   InstructionMark im(this);
5381   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5382   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5383   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5384   emit_int8(0x51);
5385   emit_operand(dst, src);
5386 }
5387 
5388 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5389   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5390   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5391   attributes.set_rex_vex_w_reverted();
5392   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5393   emit_int8(0x54);
5394   emit_int8((unsigned char)(0xC0 | encode));
5395 }
5396 
5397 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5398   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5399   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5400   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5401   emit_int8(0x54);
5402   emit_int8((unsigned char)(0xC0 | encode));
5403 }
5404 
5405 void Assembler::andps(XMMRegister dst, Address src) {
5406   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5407   InstructionMark im(this);
5408   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5409   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5410   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5411   emit_int8(0x54);
5412   emit_operand(dst, src);
5413 }
5414 
5415 void Assembler::andpd(XMMRegister dst, Address src) {
5416   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5417   InstructionMark im(this);
5418   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5419   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5420   attributes.set_rex_vex_w_reverted();
5421   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5422   emit_int8(0x54);
5423   emit_operand(dst, src);
5424 }
5425 
5426 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5427   assert(VM_Version::supports_avx(), "");
5428   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5429   attributes.set_rex_vex_w_reverted();
5430   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5431   emit_int8(0x54);
5432   emit_int8((unsigned char)(0xC0 | encode));
5433 }
5434 
5435 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5436   assert(VM_Version::supports_avx(), "");
5437   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5438   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5439   emit_int8(0x54);
5440   emit_int8((unsigned char)(0xC0 | encode));
5441 }
5442 
5443 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5444   assert(VM_Version::supports_avx(), "");
5445   InstructionMark im(this);
5446   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5447   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5448   attributes.set_rex_vex_w_reverted();
5449   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5450   emit_int8(0x54);
5451   emit_operand(dst, src);
5452 }
5453 
5454 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5455   assert(VM_Version::supports_avx(), "");
5456   InstructionMark im(this);
5457   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5458   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5459   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5460   emit_int8(0x54);
5461   emit_operand(dst, src);
5462 }
5463 
5464 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5465   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5466   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5467   attributes.set_rex_vex_w_reverted();
5468   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5469   emit_int8(0x15);
5470   emit_int8((unsigned char)(0xC0 | encode));
5471 }
5472 
5473 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5474   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5475   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5476   attributes.set_rex_vex_w_reverted();
5477   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5478   emit_int8(0x14);
5479   emit_int8((unsigned char)(0xC0 | encode));
5480 }
5481 
5482 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5483   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5484   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5485   attributes.set_rex_vex_w_reverted();
5486   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5487   emit_int8(0x57);
5488   emit_int8((unsigned char)(0xC0 | encode));
5489 }
5490 
5491 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5492   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5493   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5494   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5495   emit_int8(0x57);
5496   emit_int8((unsigned char)(0xC0 | encode));
5497 }
5498 
5499 void Assembler::xorpd(XMMRegister dst, Address src) {
5500   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5501   InstructionMark im(this);
5502   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5503   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5504   attributes.set_rex_vex_w_reverted();
5505   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5506   emit_int8(0x57);
5507   emit_operand(dst, src);
5508 }
5509 
5510 void Assembler::xorps(XMMRegister dst, Address src) {
5511   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5512   InstructionMark im(this);
5513   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5514   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5515   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5516   emit_int8(0x57);
5517   emit_operand(dst, src);
5518 }
5519 
5520 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5521   assert(VM_Version::supports_avx(), "");
5522   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5523   attributes.set_rex_vex_w_reverted();
5524   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5525   emit_int8(0x57);
5526   emit_int8((unsigned char)(0xC0 | encode));
5527 }
5528 
5529 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5530   assert(VM_Version::supports_avx(), "");
5531   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5532   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5533   emit_int8(0x57);
5534   emit_int8((unsigned char)(0xC0 | encode));
5535 }
5536 
5537 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5538   assert(VM_Version::supports_avx(), "");
5539   InstructionMark im(this);
5540   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5541   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5542   attributes.set_rex_vex_w_reverted();
5543   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5544   emit_int8(0x57);
5545   emit_operand(dst, src);
5546 }
5547 
5548 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5549   assert(VM_Version::supports_avx(), "");
5550   InstructionMark im(this);
5551   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5552   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5553   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5554   emit_int8(0x57);
5555   emit_operand(dst, src);
5556 }
5557 
5558 // Integer vector arithmetic
5559 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5560   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5561          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5562   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5563   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5564   emit_int8(0x01);
5565   emit_int8((unsigned char)(0xC0 | encode));
5566 }
5567 
5568 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5569   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5570          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5571   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5572   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5573   emit_int8(0x02);
5574   emit_int8((unsigned char)(0xC0 | encode));
5575 }
5576 
5577 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5578   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5579   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5580   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5581   emit_int8((unsigned char)0xFC);
5582   emit_int8((unsigned char)(0xC0 | encode));
5583 }
5584 
5585 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5586   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5587   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5588   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5589   emit_int8((unsigned char)0xFD);
5590   emit_int8((unsigned char)(0xC0 | encode));
5591 }
5592 
5593 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5594   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5595   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5596   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5597   emit_int8((unsigned char)0xFE);
5598   emit_int8((unsigned char)(0xC0 | encode));
5599 }
5600 
5601 void Assembler::paddd(XMMRegister dst, Address src) {
5602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5603   InstructionMark im(this);
5604   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5605   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5606   emit_int8((unsigned char)0xFE);
5607   emit_operand(dst, src);
5608 }
5609 
5610 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5611   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5612   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5613   attributes.set_rex_vex_w_reverted();
5614   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5615   emit_int8((unsigned char)0xD4);
5616   emit_int8((unsigned char)(0xC0 | encode));
5617 }
5618 
5619 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5620   assert(VM_Version::supports_sse3(), "");
5621   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5622   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5623   emit_int8(0x01);
5624   emit_int8((unsigned char)(0xC0 | encode));
5625 }
5626 
5627 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5628   assert(VM_Version::supports_sse3(), "");
5629   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5630   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5631   emit_int8(0x02);
5632   emit_int8((unsigned char)(0xC0 | encode));
5633 }
5634 
5635 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5636   assert(UseAVX > 0, "requires some form of AVX");
5637   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5638   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5639   emit_int8((unsigned char)0xFC);
5640   emit_int8((unsigned char)(0xC0 | encode));
5641 }
5642 
5643 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5644   assert(UseAVX > 0, "requires some form of AVX");
5645   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5646   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5647   emit_int8((unsigned char)0xFD);
5648   emit_int8((unsigned char)(0xC0 | encode));
5649 }
5650 
5651 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5652   assert(UseAVX > 0, "requires some form of AVX");
5653   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5654   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5655   emit_int8((unsigned char)0xFE);
5656   emit_int8((unsigned char)(0xC0 | encode));
5657 }
5658 
5659 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5660   assert(UseAVX > 0, "requires some form of AVX");
5661   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5662   attributes.set_rex_vex_w_reverted();
5663   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5664   emit_int8((unsigned char)0xD4);
5665   emit_int8((unsigned char)(0xC0 | encode));
5666 }
5667 
5668 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5669   assert(UseAVX > 0, "requires some form of AVX");
5670   InstructionMark im(this);
5671   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5672   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5673   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5674   emit_int8((unsigned char)0xFC);
5675   emit_operand(dst, src);
5676 }
5677 
5678 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5679   assert(UseAVX > 0, "requires some form of AVX");
5680   InstructionMark im(this);
5681   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5682   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5683   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5684   emit_int8((unsigned char)0xFD);
5685   emit_operand(dst, src);
5686 }
5687 
5688 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5689   assert(UseAVX > 0, "requires some form of AVX");
5690   InstructionMark im(this);
5691   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5692   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5693   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5694   emit_int8((unsigned char)0xFE);
5695   emit_operand(dst, src);
5696 }
5697 
5698 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5699   assert(UseAVX > 0, "requires some form of AVX");
5700   InstructionMark im(this);
5701   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5702   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5703   attributes.set_rex_vex_w_reverted();
5704   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5705   emit_int8((unsigned char)0xD4);
5706   emit_operand(dst, src);
5707 }
5708 
5709 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5710   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5711   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5712   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5713   emit_int8((unsigned char)0xF8);
5714   emit_int8((unsigned char)(0xC0 | encode));
5715 }
5716 
5717 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5718   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5719   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5720   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5721   emit_int8((unsigned char)0xF9);
5722   emit_int8((unsigned char)(0xC0 | encode));
5723 }
5724 
5725 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5726   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5727   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5728   emit_int8((unsigned char)0xFA);
5729   emit_int8((unsigned char)(0xC0 | encode));
5730 }
5731 
5732 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5733   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5734   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5735   attributes.set_rex_vex_w_reverted();
5736   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5737   emit_int8((unsigned char)0xFB);
5738   emit_int8((unsigned char)(0xC0 | encode));
5739 }
5740 
5741 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5742   assert(UseAVX > 0, "requires some form of AVX");
5743   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5744   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5745   emit_int8((unsigned char)0xF8);
5746   emit_int8((unsigned char)(0xC0 | encode));
5747 }
5748 
5749 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5750   assert(UseAVX > 0, "requires some form of AVX");
5751   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5752   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5753   emit_int8((unsigned char)0xF9);
5754   emit_int8((unsigned char)(0xC0 | encode));
5755 }
5756 
5757 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5758   assert(UseAVX > 0, "requires some form of AVX");
5759   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5760   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5761   emit_int8((unsigned char)0xFA);
5762   emit_int8((unsigned char)(0xC0 | encode));
5763 }
5764 
5765 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5766   assert(UseAVX > 0, "requires some form of AVX");
5767   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5768   attributes.set_rex_vex_w_reverted();
5769   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5770   emit_int8((unsigned char)0xFB);
5771   emit_int8((unsigned char)(0xC0 | encode));
5772 }
5773 
5774 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5775   assert(UseAVX > 0, "requires some form of AVX");
5776   InstructionMark im(this);
5777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5778   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5779   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5780   emit_int8((unsigned char)0xF8);
5781   emit_operand(dst, src);
5782 }
5783 
5784 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5785   assert(UseAVX > 0, "requires some form of AVX");
5786   InstructionMark im(this);
5787   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5788   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5789   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5790   emit_int8((unsigned char)0xF9);
5791   emit_operand(dst, src);
5792 }
5793 
5794 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5795   assert(UseAVX > 0, "requires some form of AVX");
5796   InstructionMark im(this);
5797   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5798   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5799   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5800   emit_int8((unsigned char)0xFA);
5801   emit_operand(dst, src);
5802 }
5803 
5804 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5805   assert(UseAVX > 0, "requires some form of AVX");
5806   InstructionMark im(this);
5807   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5808   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5809   attributes.set_rex_vex_w_reverted();
5810   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5811   emit_int8((unsigned char)0xFB);
5812   emit_operand(dst, src);
5813 }
5814 
5815 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5816   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5817   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5818   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5819   emit_int8((unsigned char)0xD5);
5820   emit_int8((unsigned char)(0xC0 | encode));
5821 }
5822 
5823 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5824   assert(VM_Version::supports_sse4_1(), "");
5825   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5826   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5827   emit_int8(0x40);
5828   emit_int8((unsigned char)(0xC0 | encode));
5829 }
5830 
5831 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5832   assert(UseAVX > 0, "requires some form of AVX");
5833   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5834   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5835   emit_int8((unsigned char)0xD5);
5836   emit_int8((unsigned char)(0xC0 | encode));
5837 }
5838 
5839 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5840   assert(UseAVX > 0, "requires some form of AVX");
5841   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5842   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5843   emit_int8(0x40);
5844   emit_int8((unsigned char)(0xC0 | encode));
5845 }
5846 
5847 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5848   assert(UseAVX > 2, "requires some form of EVEX");
5849   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5850   attributes.set_is_evex_instruction();
5851   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5852   emit_int8(0x40);
5853   emit_int8((unsigned char)(0xC0 | encode));
5854 }
5855 
5856 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5857   assert(UseAVX > 0, "requires some form of AVX");
5858   InstructionMark im(this);
5859   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5860   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5861   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5862   emit_int8((unsigned char)0xD5);
5863   emit_operand(dst, src);
5864 }
5865 
5866 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5867   assert(UseAVX > 0, "requires some form of AVX");
5868   InstructionMark im(this);
5869   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5870   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5871   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5872   emit_int8(0x40);
5873   emit_operand(dst, src);
5874 }
5875 
5876 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5877   assert(UseAVX > 2, "requires some form of EVEX");
5878   InstructionMark im(this);
5879   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5880   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5881   attributes.set_is_evex_instruction();
5882   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5883   emit_int8(0x40);
5884   emit_operand(dst, src);
5885 }
5886 
5887 // Shift packed integers left by specified number of bits.
5888 void Assembler::psllw(XMMRegister dst, int shift) {
5889   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5890   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5891   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5892   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5893   emit_int8(0x71);
5894   emit_int8((unsigned char)(0xC0 | encode));
5895   emit_int8(shift & 0xFF);
5896 }
5897 
5898 void Assembler::pslld(XMMRegister dst, int shift) {
5899   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5900   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5901   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5902   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5903   emit_int8(0x72);
5904   emit_int8((unsigned char)(0xC0 | encode));
5905   emit_int8(shift & 0xFF);
5906 }
5907 
5908 void Assembler::psllq(XMMRegister dst, int shift) {
5909   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5910   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5911   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5912   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5913   emit_int8(0x73);
5914   emit_int8((unsigned char)(0xC0 | encode));
5915   emit_int8(shift & 0xFF);
5916 }
5917 
5918 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5919   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5920   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5921   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5922   emit_int8((unsigned char)0xF1);
5923   emit_int8((unsigned char)(0xC0 | encode));
5924 }
5925 
5926 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5927   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5928   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5929   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5930   emit_int8((unsigned char)0xF2);
5931   emit_int8((unsigned char)(0xC0 | encode));
5932 }
5933 
5934 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5935   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5936   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5937   attributes.set_rex_vex_w_reverted();
5938   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5939   emit_int8((unsigned char)0xF3);
5940   emit_int8((unsigned char)(0xC0 | encode));
5941 }
5942 
5943 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5944   assert(UseAVX > 0, "requires some form of AVX");
5945   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5946   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5947   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5948   emit_int8(0x71);
5949   emit_int8((unsigned char)(0xC0 | encode));
5950   emit_int8(shift & 0xFF);
5951 }
5952 
5953 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5954   assert(UseAVX > 0, "requires some form of AVX");
5955   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5956   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5957   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5958   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5959   emit_int8(0x72);
5960   emit_int8((unsigned char)(0xC0 | encode));
5961   emit_int8(shift & 0xFF);
5962 }
5963 
5964 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5965   assert(UseAVX > 0, "requires some form of AVX");
5966   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5967   attributes.set_rex_vex_w_reverted();
5968   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5969   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5970   emit_int8(0x73);
5971   emit_int8((unsigned char)(0xC0 | encode));
5972   emit_int8(shift & 0xFF);
5973 }
5974 
5975 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5976   assert(UseAVX > 0, "requires some form of AVX");
5977   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5978   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5979   emit_int8((unsigned char)0xF1);
5980   emit_int8((unsigned char)(0xC0 | encode));
5981 }
5982 
5983 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5984   assert(UseAVX > 0, "requires some form of AVX");
5985   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5986   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5987   emit_int8((unsigned char)0xF2);
5988   emit_int8((unsigned char)(0xC0 | encode));
5989 }
5990 
5991 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5992   assert(UseAVX > 0, "requires some form of AVX");
5993   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5994   attributes.set_rex_vex_w_reverted();
5995   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5996   emit_int8((unsigned char)0xF3);
5997   emit_int8((unsigned char)(0xC0 | encode));
5998 }
5999 
6000 // Shift packed integers logically right by specified number of bits.
6001 void Assembler::psrlw(XMMRegister dst, int shift) {
6002   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6003   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6004   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6005   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6006   emit_int8(0x71);
6007   emit_int8((unsigned char)(0xC0 | encode));
6008   emit_int8(shift & 0xFF);
6009 }
6010 
6011 void Assembler::psrld(XMMRegister dst, int shift) {
6012   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6013   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6014   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6015   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6016   emit_int8(0x72);
6017   emit_int8((unsigned char)(0xC0 | encode));
6018   emit_int8(shift & 0xFF);
6019 }
6020 
6021 void Assembler::psrlq(XMMRegister dst, int shift) {
6022   // Do not confuse it with psrldq SSE2 instruction which
6023   // shifts 128 bit value in xmm register by number of bytes.
6024   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6025   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6026   attributes.set_rex_vex_w_reverted();
6027   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6028   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6029   emit_int8(0x73);
6030   emit_int8((unsigned char)(0xC0 | encode));
6031   emit_int8(shift & 0xFF);
6032 }
6033 
6034 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
6035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6036   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6037   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6038   emit_int8((unsigned char)0xD1);
6039   emit_int8((unsigned char)(0xC0 | encode));
6040 }
6041 
6042 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
6043   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6044   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6045   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6046   emit_int8((unsigned char)0xD2);
6047   emit_int8((unsigned char)(0xC0 | encode));
6048 }
6049 
6050 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
6051   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6052   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6053   attributes.set_rex_vex_w_reverted();
6054   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6055   emit_int8((unsigned char)0xD3);
6056   emit_int8((unsigned char)(0xC0 | encode));
6057 }
6058 
6059 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6060   assert(UseAVX > 0, "requires some form of AVX");
6061   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6062   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6063   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6064   emit_int8(0x71);
6065   emit_int8((unsigned char)(0xC0 | encode));
6066   emit_int8(shift & 0xFF);
6067 }
6068 
6069 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6070   assert(UseAVX > 0, "requires some form of AVX");
6071   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6072   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6073   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6074   emit_int8(0x72);
6075   emit_int8((unsigned char)(0xC0 | encode));
6076   emit_int8(shift & 0xFF);
6077 }
6078 
6079 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6080   assert(UseAVX > 0, "requires some form of AVX");
6081   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6082   attributes.set_rex_vex_w_reverted();
6083   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6084   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6085   emit_int8(0x73);
6086   emit_int8((unsigned char)(0xC0 | encode));
6087   emit_int8(shift & 0xFF);
6088 }
6089 
6090 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6091   assert(UseAVX > 0, "requires some form of AVX");
6092   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6093   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6094   emit_int8((unsigned char)0xD1);
6095   emit_int8((unsigned char)(0xC0 | encode));
6096 }
6097 
6098 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6099   assert(UseAVX > 0, "requires some form of AVX");
6100   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6101   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6102   emit_int8((unsigned char)0xD2);
6103   emit_int8((unsigned char)(0xC0 | encode));
6104 }
6105 
6106 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6107   assert(UseAVX > 0, "requires some form of AVX");
6108   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6109   attributes.set_rex_vex_w_reverted();
6110   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6111   emit_int8((unsigned char)0xD3);
6112   emit_int8((unsigned char)(0xC0 | encode));
6113 }
6114 
6115 void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6116   assert(VM_Version::supports_avx512bw(), "");
6117   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6118   attributes.set_is_evex_instruction();
6119   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6120   emit_int8(0x10);
6121   emit_int8((unsigned char)(0xC0 | encode));
6122 }
6123 
6124 void Assembler::evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6125   assert(VM_Version::supports_avx512bw(), "");
6126   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6127   attributes.set_is_evex_instruction();
6128   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6129   emit_int8(0x12);
6130   emit_int8((unsigned char)(0xC0 | encode));
6131 }
6132 
6133 // Shift packed integers arithmetically right by specified number of bits.
6134 void Assembler::psraw(XMMRegister dst, int shift) {
6135   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6136   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6137   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6138   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int8(0x71);
6140   emit_int8((unsigned char)(0xC0 | encode));
6141   emit_int8(shift & 0xFF);
6142 }
6143 
6144 void Assembler::psrad(XMMRegister dst, int shift) {
6145   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6146   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6147   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6148   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6149   emit_int8(0x72);
6150   emit_int8((unsigned char)(0xC0 | encode));
6151   emit_int8(shift & 0xFF);
6152 }
6153 
6154 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6155   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6156   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6157   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6158   emit_int8((unsigned char)0xE1);
6159   emit_int8((unsigned char)(0xC0 | encode));
6160 }
6161 
6162 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6164   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6165   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6166   emit_int8((unsigned char)0xE2);
6167   emit_int8((unsigned char)(0xC0 | encode));
6168 }
6169 
6170 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6171   assert(UseAVX > 0, "requires some form of AVX");
6172   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6173   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6174   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6175   emit_int8(0x71);
6176   emit_int8((unsigned char)(0xC0 | encode));
6177   emit_int8(shift & 0xFF);
6178 }
6179 
6180 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6181   assert(UseAVX > 0, "requires some form of AVX");
6182   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6183   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6184   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6185   emit_int8(0x72);
6186   emit_int8((unsigned char)(0xC0 | encode));
6187   emit_int8(shift & 0xFF);
6188 }
6189 
6190 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6191   assert(UseAVX > 0, "requires some form of AVX");
6192   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6193   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6194   emit_int8((unsigned char)0xE1);
6195   emit_int8((unsigned char)(0xC0 | encode));
6196 }
6197 
6198 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6199   assert(UseAVX > 0, "requires some form of AVX");
6200   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6201   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6202   emit_int8((unsigned char)0xE2);
6203   emit_int8((unsigned char)(0xC0 | encode));
6204 }
6205 
6206 
6207 // logical operations packed integers
6208 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6209   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6210   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6211   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6212   emit_int8((unsigned char)0xDB);
6213   emit_int8((unsigned char)(0xC0 | encode));
6214 }
6215 
6216 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6217   assert(UseAVX > 0, "requires some form of AVX");
6218   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6219   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6220   emit_int8((unsigned char)0xDB);
6221   emit_int8((unsigned char)(0xC0 | encode));
6222 }
6223 
6224 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6225   assert(UseAVX > 0, "requires some form of AVX");
6226   InstructionMark im(this);
6227   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6228   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6229   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6230   emit_int8((unsigned char)0xDB);
6231   emit_operand(dst, src);
6232 }
6233 
6234 void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6235   assert(VM_Version::supports_evex(), "");
6236   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6237   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6238   emit_int8((unsigned char)0xDB);
6239   emit_int8((unsigned char)(0xC0 | encode));
6240 }
6241 
6242 
6243 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6245   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6246   attributes.set_rex_vex_w_reverted();
6247   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6248   emit_int8((unsigned char)0xDF);
6249   emit_int8((unsigned char)(0xC0 | encode));
6250 }
6251 
6252 void Assembler::por(XMMRegister dst, XMMRegister src) {
6253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6254   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6255   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6256   emit_int8((unsigned char)0xEB);
6257   emit_int8((unsigned char)(0xC0 | encode));
6258 }
6259 
6260 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6261   assert(UseAVX > 0, "requires some form of AVX");
6262   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6263   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6264   emit_int8((unsigned char)0xEB);
6265   emit_int8((unsigned char)(0xC0 | encode));
6266 }
6267 
6268 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6269   assert(UseAVX > 0, "requires some form of AVX");
6270   InstructionMark im(this);
6271   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6272   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6273   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6274   emit_int8((unsigned char)0xEB);
6275   emit_operand(dst, src);
6276 }
6277 
6278 void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6279   assert(VM_Version::supports_evex(), "");
6280   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6281   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6282   emit_int8((unsigned char)0xEB);
6283   emit_int8((unsigned char)(0xC0 | encode));
6284 }
6285 
6286 
6287 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6288   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6289   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6290   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6291   emit_int8((unsigned char)0xEF);
6292   emit_int8((unsigned char)(0xC0 | encode));
6293 }
6294 
6295 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6296   assert(UseAVX > 0, "requires some form of AVX");
6297   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6298   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6299   emit_int8((unsigned char)0xEF);
6300   emit_int8((unsigned char)(0xC0 | encode));
6301 }
6302 
6303 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6304   assert(UseAVX > 0, "requires some form of AVX");
6305   InstructionMark im(this);
6306   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6307   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6308   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6309   emit_int8((unsigned char)0xEF);
6310   emit_operand(dst, src);
6311 }
6312 
6313 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6314   assert(VM_Version::supports_evex(), "requires EVEX support");
6315   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6316   attributes.set_is_evex_instruction();
6317   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6318   emit_int8((unsigned char)0xEF);
6319   emit_int8((unsigned char)(0xC0 | encode));
6320 }
6321 
6322 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6323   assert(VM_Version::supports_evex(), "requires EVEX support");
6324   assert(dst != xnoreg, "sanity");
6325   InstructionMark im(this);
6326   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6327   attributes.set_is_evex_instruction();
6328   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6329   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6330   emit_int8((unsigned char)0xEF);
6331   emit_operand(dst, src);
6332 }
6333 
6334 
6335 // vinserti forms
6336 
6337 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6338   assert(VM_Version::supports_avx2(), "");
6339   assert(imm8 <= 0x01, "imm8: %u", imm8);
6340   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6341   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6342   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6343   emit_int8(0x38);
6344   emit_int8((unsigned char)(0xC0 | encode));
6345   // 0x00 - insert into lower 128 bits
6346   // 0x01 - insert into upper 128 bits
6347   emit_int8(imm8 & 0x01);
6348 }
6349 
6350 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6351   assert(VM_Version::supports_avx2(), "");
6352   assert(dst != xnoreg, "sanity");
6353   assert(imm8 <= 0x01, "imm8: %u", imm8);
6354   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6355   InstructionMark im(this);
6356   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6357   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6358   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6359   emit_int8(0x38);
6360   emit_operand(dst, src);
6361   // 0x00 - insert into lower 128 bits
6362   // 0x01 - insert into upper 128 bits
6363   emit_int8(imm8 & 0x01);
6364 }
6365 
6366 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6367   assert(VM_Version::supports_evex(), "");
6368   assert(imm8 <= 0x03, "imm8: %u", imm8);
6369   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6370   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6371   emit_int8(0x38);
6372   emit_int8((unsigned char)(0xC0 | encode));
6373   // 0x00 - insert into q0 128 bits (0..127)
6374   // 0x01 - insert into q1 128 bits (128..255)
6375   // 0x02 - insert into q2 128 bits (256..383)
6376   // 0x03 - insert into q3 128 bits (384..511)
6377   emit_int8(imm8 & 0x03);
6378 }
6379 
6380 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6381   assert(VM_Version::supports_avx(), "");
6382   assert(dst != xnoreg, "sanity");
6383   assert(imm8 <= 0x03, "imm8: %u", imm8);
6384   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6385   InstructionMark im(this);
6386   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6387   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6388   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6389   emit_int8(0x18);
6390   emit_operand(dst, src);
6391   // 0x00 - insert into q0 128 bits (0..127)
6392   // 0x01 - insert into q1 128 bits (128..255)
6393   // 0x02 - insert into q2 128 bits (256..383)
6394   // 0x03 - insert into q3 128 bits (384..511)
6395   emit_int8(imm8 & 0x03);
6396 }
6397 
6398 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6399   assert(VM_Version::supports_evex(), "");
6400   assert(imm8 <= 0x01, "imm8: %u", imm8);
6401   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6402   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6403   emit_int8(0x38);
6404   emit_int8((unsigned char)(0xC0 | encode));
6405   // 0x00 - insert into lower 256 bits
6406   // 0x01 - insert into upper 256 bits
6407   emit_int8(imm8 & 0x01);
6408 }
6409 
6410 
6411 // vinsertf forms
6412 
6413 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6414   assert(VM_Version::supports_avx(), "");
6415   assert(imm8 <= 0x01, "imm8: %u", imm8);
6416   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6417   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6418   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6419   emit_int8(0x18);
6420   emit_int8((unsigned char)(0xC0 | encode));
6421   // 0x00 - insert into lower 128 bits
6422   // 0x01 - insert into upper 128 bits
6423   emit_int8(imm8 & 0x01);
6424 }
6425 
6426 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6427   assert(VM_Version::supports_avx(), "");
6428   assert(dst != xnoreg, "sanity");
6429   assert(imm8 <= 0x01, "imm8: %u", imm8);
6430   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6431   InstructionMark im(this);
6432   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6433   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6434   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6435   emit_int8(0x18);
6436   emit_operand(dst, src);
6437   // 0x00 - insert into lower 128 bits
6438   // 0x01 - insert into upper 128 bits
6439   emit_int8(imm8 & 0x01);
6440 }
6441 
6442 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6443   assert(VM_Version::supports_evex(), "");
6444   assert(imm8 <= 0x03, "imm8: %u", imm8);
6445   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6446   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6447   emit_int8(0x18);
6448   emit_int8((unsigned char)(0xC0 | encode));
6449   // 0x00 - insert into q0 128 bits (0..127)
6450   // 0x01 - insert into q1 128 bits (128..255)
6451   // 0x02 - insert into q2 128 bits (256..383)
6452   // 0x03 - insert into q3 128 bits (384..511)
6453   emit_int8(imm8 & 0x03);
6454 }
6455 
6456 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6457   assert(VM_Version::supports_avx(), "");
6458   assert(dst != xnoreg, "sanity");
6459   assert(imm8 <= 0x03, "imm8: %u", imm8);
6460   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6461   InstructionMark im(this);
6462   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6463   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6464   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6465   emit_int8(0x18);
6466   emit_operand(dst, src);
6467   // 0x00 - insert into q0 128 bits (0..127)
6468   // 0x01 - insert into q1 128 bits (128..255)
6469   // 0x02 - insert into q2 128 bits (256..383)
6470   // 0x03 - insert into q3 128 bits (384..511)
6471   emit_int8(imm8 & 0x03);
6472 }
6473 
6474 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6475   assert(VM_Version::supports_evex(), "");
6476   assert(imm8 <= 0x01, "imm8: %u", imm8);
6477   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6478   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6479   emit_int8(0x1A);
6480   emit_int8((unsigned char)(0xC0 | encode));
6481   // 0x00 - insert into lower 256 bits
6482   // 0x01 - insert into upper 256 bits
6483   emit_int8(imm8 & 0x01);
6484 }
6485 
6486 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6487   assert(VM_Version::supports_evex(), "");
6488   assert(dst != xnoreg, "sanity");
6489   assert(imm8 <= 0x01, "imm8: %u", imm8);
6490   InstructionMark im(this);
6491   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6492   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6493   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6494   emit_int8(0x1A);
6495   emit_operand(dst, src);
6496   // 0x00 - insert into lower 256 bits
6497   // 0x01 - insert into upper 256 bits
6498   emit_int8(imm8 & 0x01);
6499 }
6500 
6501 
6502 // vextracti forms
6503 
6504 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6505   assert(VM_Version::supports_avx(), "");
6506   assert(imm8 <= 0x01, "imm8: %u", imm8);
6507   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6508   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6509   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6510   emit_int8(0x39);
6511   emit_int8((unsigned char)(0xC0 | encode));
6512   // 0x00 - extract from lower 128 bits
6513   // 0x01 - extract from upper 128 bits
6514   emit_int8(imm8 & 0x01);
6515 }
6516 
6517 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6518   assert(VM_Version::supports_avx2(), "");
6519   assert(src != xnoreg, "sanity");
6520   assert(imm8 <= 0x01, "imm8: %u", imm8);
6521   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6522   InstructionMark im(this);
6523   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6524   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6525   attributes.reset_is_clear_context();
6526   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6527   emit_int8(0x39);
6528   emit_operand(src, dst);
6529   // 0x00 - extract from lower 128 bits
6530   // 0x01 - extract from upper 128 bits
6531   emit_int8(imm8 & 0x01);
6532 }
6533 
6534 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6535   assert(VM_Version::supports_avx(), "");
6536   assert(imm8 <= 0x03, "imm8: %u", imm8);
6537   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6538   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6539   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6540   emit_int8(0x39);
6541   emit_int8((unsigned char)(0xC0 | encode));
6542   // 0x00 - extract from bits 127:0
6543   // 0x01 - extract from bits 255:128
6544   // 0x02 - extract from bits 383:256
6545   // 0x03 - extract from bits 511:384
6546   emit_int8(imm8 & 0x03);
6547 }
6548 
6549 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6550   assert(VM_Version::supports_evex(), "");
6551   assert(src != xnoreg, "sanity");
6552   assert(imm8 <= 0x03, "imm8: %u", imm8);
6553   InstructionMark im(this);
6554   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6555   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6556   attributes.reset_is_clear_context();
6557   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6558   emit_int8(0x39);
6559   emit_operand(src, dst);
6560   // 0x00 - extract from bits 127:0
6561   // 0x01 - extract from bits 255:128
6562   // 0x02 - extract from bits 383:256
6563   // 0x03 - extract from bits 511:384
6564   emit_int8(imm8 & 0x03);
6565 }
6566 
6567 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6568   assert(VM_Version::supports_avx512dq(), "");
6569   assert(imm8 <= 0x03, "imm8: %u", imm8);
6570   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6571   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6572   emit_int8(0x39);
6573   emit_int8((unsigned char)(0xC0 | encode));
6574   // 0x00 - extract from bits 127:0
6575   // 0x01 - extract from bits 255:128
6576   // 0x02 - extract from bits 383:256
6577   // 0x03 - extract from bits 511:384
6578   emit_int8(imm8 & 0x03);
6579 }
6580 
6581 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6582   assert(VM_Version::supports_evex(), "");
6583   assert(imm8 <= 0x01, "imm8: %u", imm8);
6584   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6585   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6586   emit_int8(0x3B);
6587   emit_int8((unsigned char)(0xC0 | encode));
6588   // 0x00 - extract from lower 256 bits
6589   // 0x01 - extract from upper 256 bits
6590   emit_int8(imm8 & 0x01);
6591 }
6592 
6593 
6594 // vextractf forms
6595 
6596 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6597   assert(VM_Version::supports_avx(), "");
6598   assert(imm8 <= 0x01, "imm8: %u", imm8);
6599   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6600   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6601   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6602   emit_int8(0x19);
6603   emit_int8((unsigned char)(0xC0 | encode));
6604   // 0x00 - extract from lower 128 bits
6605   // 0x01 - extract from upper 128 bits
6606   emit_int8(imm8 & 0x01);
6607 }
6608 
6609 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6610   assert(VM_Version::supports_avx(), "");
6611   assert(src != xnoreg, "sanity");
6612   assert(imm8 <= 0x01, "imm8: %u", imm8);
6613   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6614   InstructionMark im(this);
6615   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6616   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6617   attributes.reset_is_clear_context();
6618   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6619   emit_int8(0x19);
6620   emit_operand(src, dst);
6621   // 0x00 - extract from lower 128 bits
6622   // 0x01 - extract from upper 128 bits
6623   emit_int8(imm8 & 0x01);
6624 }
6625 
6626 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6627   assert(VM_Version::supports_avx(), "");
6628   assert(imm8 <= 0x03, "imm8: %u", imm8);
6629   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6630   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6631   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6632   emit_int8(0x19);
6633   emit_int8((unsigned char)(0xC0 | encode));
6634   // 0x00 - extract from bits 127:0
6635   // 0x01 - extract from bits 255:128
6636   // 0x02 - extract from bits 383:256
6637   // 0x03 - extract from bits 511:384
6638   emit_int8(imm8 & 0x03);
6639 }
6640 
6641 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6642   assert(VM_Version::supports_evex(), "");
6643   assert(src != xnoreg, "sanity");
6644   assert(imm8 <= 0x03, "imm8: %u", imm8);
6645   InstructionMark im(this);
6646   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6647   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6648   attributes.reset_is_clear_context();
6649   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6650   emit_int8(0x19);
6651   emit_operand(src, dst);
6652   // 0x00 - extract from bits 127:0
6653   // 0x01 - extract from bits 255:128
6654   // 0x02 - extract from bits 383:256
6655   // 0x03 - extract from bits 511:384
6656   emit_int8(imm8 & 0x03);
6657 }
6658 
6659 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6660   assert(VM_Version::supports_avx512dq(), "");
6661   assert(imm8 <= 0x03, "imm8: %u", imm8);
6662   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6663   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6664   emit_int8(0x19);
6665   emit_int8((unsigned char)(0xC0 | encode));
6666   // 0x00 - extract from bits 127:0
6667   // 0x01 - extract from bits 255:128
6668   // 0x02 - extract from bits 383:256
6669   // 0x03 - extract from bits 511:384
6670   emit_int8(imm8 & 0x03);
6671 }
6672 
6673 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6674   assert(VM_Version::supports_evex(), "");
6675   assert(imm8 <= 0x01, "imm8: %u", imm8);
6676   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6677   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6678   emit_int8(0x1B);
6679   emit_int8((unsigned char)(0xC0 | encode));
6680   // 0x00 - extract from lower 256 bits
6681   // 0x01 - extract from upper 256 bits
6682   emit_int8(imm8 & 0x01);
6683 }
6684 
6685 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6686   assert(VM_Version::supports_evex(), "");
6687   assert(src != xnoreg, "sanity");
6688   assert(imm8 <= 0x01, "imm8: %u", imm8);
6689   InstructionMark im(this);
6690   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6691   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6692   attributes.reset_is_clear_context();
6693   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6694   emit_int8(0x1B);
6695   emit_operand(src, dst);
6696   // 0x00 - extract from lower 256 bits
6697   // 0x01 - extract from upper 256 bits
6698   emit_int8(imm8 & 0x01);
6699 }
6700 
6701 
6702 // legacy word/dword replicate
6703 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6704   assert(VM_Version::supports_avx2(), "");
6705   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6706   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6707   emit_int8(0x79);
6708   emit_int8((unsigned char)(0xC0 | encode));
6709 }
6710 
6711 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6712   assert(VM_Version::supports_avx2(), "");
6713   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6714   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6715   emit_int8(0x58);
6716   emit_int8((unsigned char)(0xC0 | encode));
6717 }
6718 
6719 
6720 // xmm/mem sourced byte/word/dword/qword replicate
6721 
6722 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6723 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6724   assert(VM_Version::supports_evex(), "");
6725   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6726   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6727   emit_int8(0x78);
6728   emit_int8((unsigned char)(0xC0 | encode));
6729 }
6730 
6731 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6732   assert(VM_Version::supports_evex(), "");
6733   assert(dst != xnoreg, "sanity");
6734   InstructionMark im(this);
6735   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6736   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6737   // swap src<->dst for encoding
6738   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6739   emit_int8(0x78);
6740   emit_operand(dst, src);
6741 }
6742 
6743 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6744 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6745   assert(VM_Version::supports_evex(), "");
6746   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6747   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6748   emit_int8(0x79);
6749   emit_int8((unsigned char)(0xC0 | encode));
6750 }
6751 
6752 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6753   assert(VM_Version::supports_evex(), "");
6754   assert(dst != xnoreg, "sanity");
6755   InstructionMark im(this);
6756   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6757   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6758   // swap src<->dst for encoding
6759   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6760   emit_int8(0x79);
6761   emit_operand(dst, src);
6762 }
6763 
6764 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6765 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6766   assert(VM_Version::supports_evex(), "");
6767   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6768   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6769   emit_int8(0x58);
6770   emit_int8((unsigned char)(0xC0 | encode));
6771 }
6772 
6773 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6774   assert(VM_Version::supports_evex(), "");
6775   assert(dst != xnoreg, "sanity");
6776   InstructionMark im(this);
6777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6778   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6779   // swap src<->dst for encoding
6780   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6781   emit_int8(0x58);
6782   emit_operand(dst, src);
6783 }
6784 
6785 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6786 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6787   assert(VM_Version::supports_evex(), "");
6788   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6789   attributes.set_rex_vex_w_reverted();
6790   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6791   emit_int8(0x59);
6792   emit_int8((unsigned char)(0xC0 | encode));
6793 }
6794 
6795 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6796   assert(VM_Version::supports_evex(), "");
6797   assert(dst != xnoreg, "sanity");
6798   InstructionMark im(this);
6799   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6800   attributes.set_rex_vex_w_reverted();
6801   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6802   // swap src<->dst for encoding
6803   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6804   emit_int8(0x59);
6805   emit_operand(dst, src);
6806 }
6807 
6808 
6809 // scalar single/double precision replicate
6810 
6811 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6812 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6813   assert(VM_Version::supports_evex(), "");
6814   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6815   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6816   emit_int8(0x18);
6817   emit_int8((unsigned char)(0xC0 | encode));
6818 }
6819 
6820 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6821   assert(VM_Version::supports_evex(), "");
6822   assert(dst != xnoreg, "sanity");
6823   InstructionMark im(this);
6824   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6825   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6826   // swap src<->dst for encoding
6827   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6828   emit_int8(0x18);
6829   emit_operand(dst, src);
6830 }
6831 
6832 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6833 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6834   assert(VM_Version::supports_evex(), "");
6835   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6836   attributes.set_rex_vex_w_reverted();
6837   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6838   emit_int8(0x19);
6839   emit_int8((unsigned char)(0xC0 | encode));
6840 }
6841 
6842 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6843   assert(VM_Version::supports_evex(), "");
6844   assert(dst != xnoreg, "sanity");
6845   InstructionMark im(this);
6846   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6847   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6848   attributes.set_rex_vex_w_reverted();
6849   // swap src<->dst for encoding
6850   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6851   emit_int8(0x19);
6852   emit_operand(dst, src);
6853 }
6854 
6855 
6856 // gpr source broadcast forms
6857 
6858 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6859 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6860   assert(VM_Version::supports_evex(), "");
6861   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6862   attributes.set_is_evex_instruction();
6863   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6864   emit_int8(0x7A);
6865   emit_int8((unsigned char)(0xC0 | encode));
6866 }
6867 
6868 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6869 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6870   assert(VM_Version::supports_evex(), "");
6871   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6872   attributes.set_is_evex_instruction();
6873   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6874   emit_int8(0x7B);
6875   emit_int8((unsigned char)(0xC0 | encode));
6876 }
6877 
6878 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6879 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6880   assert(VM_Version::supports_evex(), "");
6881   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6882   attributes.set_is_evex_instruction();
6883   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6884   emit_int8(0x7C);
6885   emit_int8((unsigned char)(0xC0 | encode));
6886 }
6887 
6888 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6889 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6890   assert(VM_Version::supports_evex(), "");
6891   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6892   attributes.set_is_evex_instruction();
6893   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6894   emit_int8(0x7C);
6895   emit_int8((unsigned char)(0xC0 | encode));
6896 }
6897 
6898 void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
6899   assert(VM_Version::supports_evex(), "");
6900   assert(dst != xnoreg, "sanity");
6901   InstructionMark im(this);
6902   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6903   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6904   attributes.reset_is_clear_context();
6905   attributes.set_embedded_opmask_register_specifier(mask);
6906   attributes.set_is_evex_instruction();
6907   // swap src<->dst for encoding
6908   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6909   emit_int8((unsigned char)0x90);
6910   emit_operand(dst, src);
6911 }
6912 
6913 // Carry-Less Multiplication Quadword
6914 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6915   assert(VM_Version::supports_clmul(), "");
6916   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6917   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6918   emit_int8(0x44);
6919   emit_int8((unsigned char)(0xC0 | encode));
6920   emit_int8((unsigned char)mask);
6921 }
6922 
6923 // Carry-Less Multiplication Quadword
6924 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6925   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6926   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6927   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6928   emit_int8(0x44);
6929   emit_int8((unsigned char)(0xC0 | encode));
6930   emit_int8((unsigned char)mask);
6931 }
6932 
6933 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
6934   assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
6935   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6936   attributes.set_is_evex_instruction();
6937   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6938   emit_int8(0x44);
6939   emit_int8((unsigned char)(0xC0 | encode));
6940   emit_int8((unsigned char)mask);
6941 }
6942 
6943 void Assembler::vzeroupper() {
6944   if (VM_Version::supports_vzeroupper()) {
6945     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6946     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6947     emit_int8(0x77);
6948   }
6949 }
6950 
6951 #ifndef _LP64
6952 // 32bit only pieces of the assembler
6953 
6954 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6955   // NO PREFIX AS NEVER 64BIT
6956   InstructionMark im(this);
6957   emit_int8((unsigned char)0x81);
6958   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6959   emit_data(imm32, rspec, 0);
6960 }
6961 
6962 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6963   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6964   InstructionMark im(this);
6965   emit_int8((unsigned char)0x81);
6966   emit_operand(rdi, src1);
6967   emit_data(imm32, rspec, 0);
6968 }
6969 
6970 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6971 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6972 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6973 void Assembler::cmpxchg8(Address adr) {
6974   InstructionMark im(this);
6975   emit_int8(0x0F);
6976   emit_int8((unsigned char)0xC7);
6977   emit_operand(rcx, adr);
6978 }
6979 
6980 void Assembler::decl(Register dst) {
6981   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6982  emit_int8(0x48 | dst->encoding());
6983 }
6984 
6985 #endif // _LP64
6986 
6987 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6988 
6989 void Assembler::fabs() {
6990   emit_int8((unsigned char)0xD9);
6991   emit_int8((unsigned char)0xE1);
6992 }
6993 
6994 void Assembler::fadd(int i) {
6995   emit_farith(0xD8, 0xC0, i);
6996 }
6997 
6998 void Assembler::fadd_d(Address src) {
6999   InstructionMark im(this);
7000   emit_int8((unsigned char)0xDC);
7001   emit_operand32(rax, src);
7002 }
7003 
7004 void Assembler::fadd_s(Address src) {
7005   InstructionMark im(this);
7006   emit_int8((unsigned char)0xD8);
7007   emit_operand32(rax, src);
7008 }
7009 
7010 void Assembler::fadda(int i) {
7011   emit_farith(0xDC, 0xC0, i);
7012 }
7013 
7014 void Assembler::faddp(int i) {
7015   emit_farith(0xDE, 0xC0, i);
7016 }
7017 
7018 void Assembler::fchs() {
7019   emit_int8((unsigned char)0xD9);
7020   emit_int8((unsigned char)0xE0);
7021 }
7022 
7023 void Assembler::fcom(int i) {
7024   emit_farith(0xD8, 0xD0, i);
7025 }
7026 
7027 void Assembler::fcomp(int i) {
7028   emit_farith(0xD8, 0xD8, i);
7029 }
7030 
7031 void Assembler::fcomp_d(Address src) {
7032   InstructionMark im(this);
7033   emit_int8((unsigned char)0xDC);
7034   emit_operand32(rbx, src);
7035 }
7036 
7037 void Assembler::fcomp_s(Address src) {
7038   InstructionMark im(this);
7039   emit_int8((unsigned char)0xD8);
7040   emit_operand32(rbx, src);
7041 }
7042 
7043 void Assembler::fcompp() {
7044   emit_int8((unsigned char)0xDE);
7045   emit_int8((unsigned char)0xD9);
7046 }
7047 
7048 void Assembler::fcos() {
7049   emit_int8((unsigned char)0xD9);
7050   emit_int8((unsigned char)0xFF);
7051 }
7052 
7053 void Assembler::fdecstp() {
7054   emit_int8((unsigned char)0xD9);
7055   emit_int8((unsigned char)0xF6);
7056 }
7057 
7058 void Assembler::fdiv(int i) {
7059   emit_farith(0xD8, 0xF0, i);
7060 }
7061 
7062 void Assembler::fdiv_d(Address src) {
7063   InstructionMark im(this);
7064   emit_int8((unsigned char)0xDC);
7065   emit_operand32(rsi, src);
7066 }
7067 
7068 void Assembler::fdiv_s(Address src) {
7069   InstructionMark im(this);
7070   emit_int8((unsigned char)0xD8);
7071   emit_operand32(rsi, src);
7072 }
7073 
7074 void Assembler::fdiva(int i) {
7075   emit_farith(0xDC, 0xF8, i);
7076 }
7077 
7078 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
7079 //       is erroneous for some of the floating-point instructions below.
7080 
7081 void Assembler::fdivp(int i) {
7082   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
7083 }
7084 
7085 void Assembler::fdivr(int i) {
7086   emit_farith(0xD8, 0xF8, i);
7087 }
7088 
7089 void Assembler::fdivr_d(Address src) {
7090   InstructionMark im(this);
7091   emit_int8((unsigned char)0xDC);
7092   emit_operand32(rdi, src);
7093 }
7094 
7095 void Assembler::fdivr_s(Address src) {
7096   InstructionMark im(this);
7097   emit_int8((unsigned char)0xD8);
7098   emit_operand32(rdi, src);
7099 }
7100 
7101 void Assembler::fdivra(int i) {
7102   emit_farith(0xDC, 0xF0, i);
7103 }
7104 
7105 void Assembler::fdivrp(int i) {
7106   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
7107 }
7108 
7109 void Assembler::ffree(int i) {
7110   emit_farith(0xDD, 0xC0, i);
7111 }
7112 
7113 void Assembler::fild_d(Address adr) {
7114   InstructionMark im(this);
7115   emit_int8((unsigned char)0xDF);
7116   emit_operand32(rbp, adr);
7117 }
7118 
7119 void Assembler::fild_s(Address adr) {
7120   InstructionMark im(this);
7121   emit_int8((unsigned char)0xDB);
7122   emit_operand32(rax, adr);
7123 }
7124 
7125 void Assembler::fincstp() {
7126   emit_int8((unsigned char)0xD9);
7127   emit_int8((unsigned char)0xF7);
7128 }
7129 
7130 void Assembler::finit() {
7131   emit_int8((unsigned char)0x9B);
7132   emit_int8((unsigned char)0xDB);
7133   emit_int8((unsigned char)0xE3);
7134 }
7135 
7136 void Assembler::fist_s(Address adr) {
7137   InstructionMark im(this);
7138   emit_int8((unsigned char)0xDB);
7139   emit_operand32(rdx, adr);
7140 }
7141 
7142 void Assembler::fistp_d(Address adr) {
7143   InstructionMark im(this);
7144   emit_int8((unsigned char)0xDF);
7145   emit_operand32(rdi, adr);
7146 }
7147 
7148 void Assembler::fistp_s(Address adr) {
7149   InstructionMark im(this);
7150   emit_int8((unsigned char)0xDB);
7151   emit_operand32(rbx, adr);
7152 }
7153 
7154 void Assembler::fld1() {
7155   emit_int8((unsigned char)0xD9);
7156   emit_int8((unsigned char)0xE8);
7157 }
7158 
7159 void Assembler::fld_d(Address adr) {
7160   InstructionMark im(this);
7161   emit_int8((unsigned char)0xDD);
7162   emit_operand32(rax, adr);
7163 }
7164 
7165 void Assembler::fld_s(Address adr) {
7166   InstructionMark im(this);
7167   emit_int8((unsigned char)0xD9);
7168   emit_operand32(rax, adr);
7169 }
7170 
7171 
7172 void Assembler::fld_s(int index) {
7173   emit_farith(0xD9, 0xC0, index);
7174 }
7175 
7176 void Assembler::fld_x(Address adr) {
7177   InstructionMark im(this);
7178   emit_int8((unsigned char)0xDB);
7179   emit_operand32(rbp, adr);
7180 }
7181 
7182 void Assembler::fldcw(Address src) {
7183   InstructionMark im(this);
7184   emit_int8((unsigned char)0xD9);
7185   emit_operand32(rbp, src);
7186 }
7187 
7188 void Assembler::fldenv(Address src) {
7189   InstructionMark im(this);
7190   emit_int8((unsigned char)0xD9);
7191   emit_operand32(rsp, src);
7192 }
7193 
7194 void Assembler::fldlg2() {
7195   emit_int8((unsigned char)0xD9);
7196   emit_int8((unsigned char)0xEC);
7197 }
7198 
7199 void Assembler::fldln2() {
7200   emit_int8((unsigned char)0xD9);
7201   emit_int8((unsigned char)0xED);
7202 }
7203 
7204 void Assembler::fldz() {
7205   emit_int8((unsigned char)0xD9);
7206   emit_int8((unsigned char)0xEE);
7207 }
7208 
7209 void Assembler::flog() {
7210   fldln2();
7211   fxch();
7212   fyl2x();
7213 }
7214 
7215 void Assembler::flog10() {
7216   fldlg2();
7217   fxch();
7218   fyl2x();
7219 }
7220 
7221 void Assembler::fmul(int i) {
7222   emit_farith(0xD8, 0xC8, i);
7223 }
7224 
7225 void Assembler::fmul_d(Address src) {
7226   InstructionMark im(this);
7227   emit_int8((unsigned char)0xDC);
7228   emit_operand32(rcx, src);
7229 }
7230 
7231 void Assembler::fmul_s(Address src) {
7232   InstructionMark im(this);
7233   emit_int8((unsigned char)0xD8);
7234   emit_operand32(rcx, src);
7235 }
7236 
7237 void Assembler::fmula(int i) {
7238   emit_farith(0xDC, 0xC8, i);
7239 }
7240 
7241 void Assembler::fmulp(int i) {
7242   emit_farith(0xDE, 0xC8, i);
7243 }
7244 
7245 void Assembler::fnsave(Address dst) {
7246   InstructionMark im(this);
7247   emit_int8((unsigned char)0xDD);
7248   emit_operand32(rsi, dst);
7249 }
7250 
7251 void Assembler::fnstcw(Address src) {
7252   InstructionMark im(this);
7253   emit_int8((unsigned char)0x9B);
7254   emit_int8((unsigned char)0xD9);
7255   emit_operand32(rdi, src);
7256 }
7257 
7258 void Assembler::fnstsw_ax() {
7259   emit_int8((unsigned char)0xDF);
7260   emit_int8((unsigned char)0xE0);
7261 }
7262 
7263 void Assembler::fprem() {
7264   emit_int8((unsigned char)0xD9);
7265   emit_int8((unsigned char)0xF8);
7266 }
7267 
7268 void Assembler::fprem1() {
7269   emit_int8((unsigned char)0xD9);
7270   emit_int8((unsigned char)0xF5);
7271 }
7272 
7273 void Assembler::frstor(Address src) {
7274   InstructionMark im(this);
7275   emit_int8((unsigned char)0xDD);
7276   emit_operand32(rsp, src);
7277 }
7278 
7279 void Assembler::fsin() {
7280   emit_int8((unsigned char)0xD9);
7281   emit_int8((unsigned char)0xFE);
7282 }
7283 
7284 void Assembler::fsqrt() {
7285   emit_int8((unsigned char)0xD9);
7286   emit_int8((unsigned char)0xFA);
7287 }
7288 
7289 void Assembler::fst_d(Address adr) {
7290   InstructionMark im(this);
7291   emit_int8((unsigned char)0xDD);
7292   emit_operand32(rdx, adr);
7293 }
7294 
7295 void Assembler::fst_s(Address adr) {
7296   InstructionMark im(this);
7297   emit_int8((unsigned char)0xD9);
7298   emit_operand32(rdx, adr);
7299 }
7300 
7301 void Assembler::fstp_d(Address adr) {
7302   InstructionMark im(this);
7303   emit_int8((unsigned char)0xDD);
7304   emit_operand32(rbx, adr);
7305 }
7306 
7307 void Assembler::fstp_d(int index) {
7308   emit_farith(0xDD, 0xD8, index);
7309 }
7310 
7311 void Assembler::fstp_s(Address adr) {
7312   InstructionMark im(this);
7313   emit_int8((unsigned char)0xD9);
7314   emit_operand32(rbx, adr);
7315 }
7316 
7317 void Assembler::fstp_x(Address adr) {
7318   InstructionMark im(this);
7319   emit_int8((unsigned char)0xDB);
7320   emit_operand32(rdi, adr);
7321 }
7322 
7323 void Assembler::fsub(int i) {
7324   emit_farith(0xD8, 0xE0, i);
7325 }
7326 
7327 void Assembler::fsub_d(Address src) {
7328   InstructionMark im(this);
7329   emit_int8((unsigned char)0xDC);
7330   emit_operand32(rsp, src);
7331 }
7332 
7333 void Assembler::fsub_s(Address src) {
7334   InstructionMark im(this);
7335   emit_int8((unsigned char)0xD8);
7336   emit_operand32(rsp, src);
7337 }
7338 
7339 void Assembler::fsuba(int i) {
7340   emit_farith(0xDC, 0xE8, i);
7341 }
7342 
7343 void Assembler::fsubp(int i) {
7344   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7345 }
7346 
7347 void Assembler::fsubr(int i) {
7348   emit_farith(0xD8, 0xE8, i);
7349 }
7350 
7351 void Assembler::fsubr_d(Address src) {
7352   InstructionMark im(this);
7353   emit_int8((unsigned char)0xDC);
7354   emit_operand32(rbp, src);
7355 }
7356 
7357 void Assembler::fsubr_s(Address src) {
7358   InstructionMark im(this);
7359   emit_int8((unsigned char)0xD8);
7360   emit_operand32(rbp, src);
7361 }
7362 
7363 void Assembler::fsubra(int i) {
7364   emit_farith(0xDC, 0xE0, i);
7365 }
7366 
7367 void Assembler::fsubrp(int i) {
7368   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7369 }
7370 
7371 void Assembler::ftan() {
7372   emit_int8((unsigned char)0xD9);
7373   emit_int8((unsigned char)0xF2);
7374   emit_int8((unsigned char)0xDD);
7375   emit_int8((unsigned char)0xD8);
7376 }
7377 
7378 void Assembler::ftst() {
7379   emit_int8((unsigned char)0xD9);
7380   emit_int8((unsigned char)0xE4);
7381 }
7382 
7383 void Assembler::fucomi(int i) {
7384   // make sure the instruction is supported (introduced for P6, together with cmov)
7385   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7386   emit_farith(0xDB, 0xE8, i);
7387 }
7388 
7389 void Assembler::fucomip(int i) {
7390   // make sure the instruction is supported (introduced for P6, together with cmov)
7391   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7392   emit_farith(0xDF, 0xE8, i);
7393 }
7394 
7395 void Assembler::fwait() {
7396   emit_int8((unsigned char)0x9B);
7397 }
7398 
7399 void Assembler::fxch(int i) {
7400   emit_farith(0xD9, 0xC8, i);
7401 }
7402 
7403 void Assembler::fyl2x() {
7404   emit_int8((unsigned char)0xD9);
7405   emit_int8((unsigned char)0xF1);
7406 }
7407 
7408 void Assembler::frndint() {
7409   emit_int8((unsigned char)0xD9);
7410   emit_int8((unsigned char)0xFC);
7411 }
7412 
7413 void Assembler::f2xm1() {
7414   emit_int8((unsigned char)0xD9);
7415   emit_int8((unsigned char)0xF0);
7416 }
7417 
7418 void Assembler::fldl2e() {
7419   emit_int8((unsigned char)0xD9);
7420   emit_int8((unsigned char)0xEA);
7421 }
7422 
7423 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7424 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7425 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7426 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7427 
7428 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7429 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7430   if (pre > 0) {
7431     emit_int8(simd_pre[pre]);
7432   }
7433   if (rex_w) {
7434     prefixq(adr, xreg);
7435   } else {
7436     prefix(adr, xreg);
7437   }
7438   if (opc > 0) {
7439     emit_int8(0x0F);
7440     int opc2 = simd_opc[opc];
7441     if (opc2 > 0) {
7442       emit_int8(opc2);
7443     }
7444   }
7445 }
7446 
7447 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7448   if (pre > 0) {
7449     emit_int8(simd_pre[pre]);
7450   }
7451   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7452   if (opc > 0) {
7453     emit_int8(0x0F);
7454     int opc2 = simd_opc[opc];
7455     if (opc2 > 0) {
7456       emit_int8(opc2);
7457     }
7458   }
7459   return encode;
7460 }
7461 
7462 
7463 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7464   int vector_len = _attributes->get_vector_len();
7465   bool vex_w = _attributes->is_rex_vex_w();
7466   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7467     prefix(VEX_3bytes);
7468 
7469     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7470     byte1 = (~byte1) & 0xE0;
7471     byte1 |= opc;
7472     emit_int8(byte1);
7473 
7474     int byte2 = ((~nds_enc) & 0xf) << 3;
7475     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7476     emit_int8(byte2);
7477   } else {
7478     prefix(VEX_2bytes);
7479 
7480     int byte1 = vex_r ? VEX_R : 0;
7481     byte1 = (~byte1) & 0x80;
7482     byte1 |= ((~nds_enc) & 0xf) << 3;
7483     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7484     emit_int8(byte1);
7485   }
7486 }
7487 
7488 // This is a 4 byte encoding
7489 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7490   // EVEX 0x62 prefix
7491   prefix(EVEX_4bytes);
7492   bool vex_w = _attributes->is_rex_vex_w();
7493   int evex_encoding = (vex_w ? VEX_W : 0);
7494   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7495   _attributes->set_evex_encoding(evex_encoding);
7496 
7497   // P0: byte 2, initialized to RXBR`00mm
7498   // instead of not'd
7499   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7500   byte2 = (~byte2) & 0xF0;
7501   // confine opc opcode extensions in mm bits to lower two bits
7502   // of form {0F, 0F_38, 0F_3A}
7503   byte2 |= opc;
7504   emit_int8(byte2);
7505 
7506   // P1: byte 3 as Wvvvv1pp
7507   int byte3 = ((~nds_enc) & 0xf) << 3;
7508   // p[10] is always 1
7509   byte3 |= EVEX_F;
7510   byte3 |= (vex_w & 1) << 7;
7511   // confine pre opcode extensions in pp bits to lower two bits
7512   // of form {66, F3, F2}
7513   byte3 |= pre;
7514   emit_int8(byte3);
7515 
7516   // P2: byte 4 as zL'Lbv'aaa
7517   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7518   int byte4 = (_attributes->is_no_reg_mask()) ?
7519               0 :
7520               _attributes->get_embedded_opmask_register_specifier();
7521   // EVEX.v` for extending EVEX.vvvv or VIDX
7522   byte4 |= (evex_v ? 0: EVEX_V);
7523   // third EXEC.b for broadcast actions
7524   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7525   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7526   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7527   // last is EVEX.z for zero/merge actions
7528   if (_attributes->is_no_reg_mask() == false) {
7529     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7530   }
7531   emit_int8(byte4);
7532 }
7533 
7534 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7535   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7536   bool vex_b = adr.base_needs_rex();
7537   bool vex_x;
7538   if (adr.isxmmindex()) {
7539     vex_x = adr.xmmindex_needs_rex();
7540   } else {
7541     vex_x = adr.index_needs_rex();
7542   }
7543   set_attributes(attributes);
7544   attributes->set_current_assembler(this);
7545 
7546   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7547   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7548     switch (attributes->get_vector_len()) {
7549     case AVX_128bit:
7550     case AVX_256bit:
7551       attributes->set_is_legacy_mode();
7552       break;
7553     }
7554   }
7555 
7556   // For pure EVEX check and see if this instruction
7557   // is allowed in legacy mode and has resources which will
7558   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7559   // else that field is set when we encode to EVEX
7560   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7561       !_is_managed && !attributes->is_evex_instruction()) {
7562     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7563       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7564       if (check_register_bank) {
7565         // check nds_enc and xreg_enc for upper bank usage
7566         if (nds_enc < 16 && xreg_enc < 16) {
7567           attributes->set_is_legacy_mode();
7568         }
7569       } else {
7570         attributes->set_is_legacy_mode();
7571       }
7572     }
7573   }
7574 
7575   _is_managed = false;
7576   if (UseAVX > 2 && !attributes->is_legacy_mode())
7577   {
7578     bool evex_r = (xreg_enc >= 16);
7579     bool evex_v;
7580     // EVEX.V' is set to true when VSIB is used as we may need to use higher order XMM registers (16-31)
7581     if (adr.isxmmindex())  {
7582       evex_v = ((adr._xmmindex->encoding() > 15) ? true : false);
7583     } else {
7584       evex_v = (nds_enc >= 16);
7585     }
7586     attributes->set_is_evex_instruction();
7587     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7588   } else {
7589     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7590       attributes->set_rex_vex_w(false);
7591     }
7592     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7593   }
7594 }
7595 
7596 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7597   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7598   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7599   bool vex_x = false;
7600   set_attributes(attributes);
7601   attributes->set_current_assembler(this);
7602   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7603 
7604   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7605   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7606     switch (attributes->get_vector_len()) {
7607     case AVX_128bit:
7608     case AVX_256bit:
7609       if (check_register_bank) {
7610         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7611           // up propagate arithmetic instructions to meet RA requirements
7612           attributes->set_vector_len(AVX_512bit);
7613         } else {
7614           attributes->set_is_legacy_mode();
7615         }
7616       } else {
7617         attributes->set_is_legacy_mode();
7618       }
7619       break;
7620     }
7621   }
7622 
7623   // For pure EVEX check and see if this instruction
7624   // is allowed in legacy mode and has resources which will
7625   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7626   // else that field is set when we encode to EVEX
7627   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7628       !_is_managed && !attributes->is_evex_instruction()) {
7629     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7630       if (check_register_bank) {
7631         // check dst_enc, nds_enc and src_enc for upper bank usage
7632         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7633           attributes->set_is_legacy_mode();
7634         }
7635       } else {
7636         attributes->set_is_legacy_mode();
7637       }
7638     }
7639   }
7640 
7641   _is_managed = false;
7642   if (UseAVX > 2 && !attributes->is_legacy_mode())
7643   {
7644     bool evex_r = (dst_enc >= 16);
7645     bool evex_v = (nds_enc >= 16);
7646     // can use vex_x as bank extender on rm encoding
7647     vex_x = (src_enc >= 16);
7648     attributes->set_is_evex_instruction();
7649     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7650   } else {
7651     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7652       attributes->set_rex_vex_w(false);
7653     }
7654     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7655   }
7656 
7657   // return modrm byte components for operands
7658   return (((dst_enc & 7) << 3) | (src_enc & 7));
7659 }
7660 
7661 
7662 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7663                             VexOpcode opc, InstructionAttr *attributes) {
7664   if (UseAVX > 0) {
7665     int xreg_enc = xreg->encoding();
7666     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7667     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7668   } else {
7669     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7670     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7671   }
7672 }
7673 
7674 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7675                                       VexOpcode opc, InstructionAttr *attributes) {
7676   int dst_enc = dst->encoding();
7677   int src_enc = src->encoding();
7678   if (UseAVX > 0) {
7679     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7680     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7681   } else {
7682     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7683     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7684   }
7685 }
7686 
7687 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7688   assert(VM_Version::supports_avx(), "");
7689   assert(!VM_Version::supports_evex(), "");
7690   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7691   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7692   emit_int8((unsigned char)0xC2);
7693   emit_int8((unsigned char)(0xC0 | encode));
7694   emit_int8((unsigned char)(0xF & cop));
7695 }
7696 
7697 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7698   assert(VM_Version::supports_avx(), "");
7699   assert(!VM_Version::supports_evex(), "");
7700   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7701   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7702   emit_int8((unsigned char)0x4B);
7703   emit_int8((unsigned char)(0xC0 | encode));
7704   int src2_enc = src2->encoding();
7705   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7706 }
7707 
7708 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7709   assert(VM_Version::supports_avx(), "");
7710   assert(!VM_Version::supports_evex(), "");
7711   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7712   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7713   emit_int8((unsigned char)0xC2);
7714   emit_int8((unsigned char)(0xC0 | encode));
7715   emit_int8((unsigned char)(0xF & cop));
7716 }
7717 
7718 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7719   assert(VM_Version::supports_avx(), "");
7720   assert(!VM_Version::supports_evex(), "");
7721   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7722   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7723   emit_int8((unsigned char)0x4A);
7724   emit_int8((unsigned char)(0xC0 | encode));
7725   int src2_enc = src2->encoding();
7726   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7727 }
7728 
7729 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7730   assert(VM_Version::supports_avx2(), "");
7731   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7732   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7733   emit_int8((unsigned char)0x02);
7734   emit_int8((unsigned char)(0xC0 | encode));
7735   emit_int8((unsigned char)imm8);
7736 }
7737 
7738 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7739   assert(VM_Version::supports_bmi2(), "");
7740   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7741   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7742   emit_int8((unsigned char)0xF7);
7743   emit_int8((unsigned char)(0xC0 | encode));
7744 }
7745 
7746 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7747   assert(VM_Version::supports_bmi2(), "");
7748   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7749   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7750   emit_int8((unsigned char)0xF7);
7751   emit_int8((unsigned char)(0xC0 | encode));
7752 }
7753 
7754 #ifndef _LP64
7755 
7756 void Assembler::incl(Register dst) {
7757   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7758   emit_int8(0x40 | dst->encoding());
7759 }
7760 
7761 void Assembler::lea(Register dst, Address src) {
7762   leal(dst, src);
7763 }
7764 
7765 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7766   InstructionMark im(this);
7767   emit_int8((unsigned char)0xC7);
7768   emit_operand(rax, dst);
7769   emit_data((int)imm32, rspec, 0);
7770 }
7771 
7772 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7773   InstructionMark im(this);
7774   int encode = prefix_and_encode(dst->encoding());
7775   emit_int8((unsigned char)(0xB8 | encode));
7776   emit_data((int)imm32, rspec, 0);
7777 }
7778 
7779 void Assembler::popa() { // 32bit
7780   emit_int8(0x61);
7781 }
7782 
7783 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7784   InstructionMark im(this);
7785   emit_int8(0x68);
7786   emit_data(imm32, rspec, 0);
7787 }
7788 
7789 void Assembler::pusha() { // 32bit
7790   emit_int8(0x60);
7791 }
7792 
7793 void Assembler::set_byte_if_not_zero(Register dst) {
7794   emit_int8(0x0F);
7795   emit_int8((unsigned char)0x95);
7796   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7797 }
7798 
7799 void Assembler::shldl(Register dst, Register src) {
7800   emit_int8(0x0F);
7801   emit_int8((unsigned char)0xA5);
7802   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7803 }
7804 
7805 // 0F A4 / r ib
7806 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7807   emit_int8(0x0F);
7808   emit_int8((unsigned char)0xA4);
7809   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7810   emit_int8(imm8);
7811 }
7812 
7813 void Assembler::shrdl(Register dst, Register src) {
7814   emit_int8(0x0F);
7815   emit_int8((unsigned char)0xAD);
7816   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7817 }
7818 
7819 #else // LP64
7820 
7821 void Assembler::set_byte_if_not_zero(Register dst) {
7822   int enc = prefix_and_encode(dst->encoding(), true);
7823   emit_int8(0x0F);
7824   emit_int8((unsigned char)0x95);
7825   emit_int8((unsigned char)(0xE0 | enc));
7826 }
7827 
7828 // 64bit only pieces of the assembler
7829 // This should only be used by 64bit instructions that can use rip-relative
7830 // it cannot be used by instructions that want an immediate value.
7831 
7832 bool Assembler::reachable(AddressLiteral adr) {
7833   int64_t disp;
7834   // None will force a 64bit literal to the code stream. Likely a placeholder
7835   // for something that will be patched later and we need to certain it will
7836   // always be reachable.
7837   if (adr.reloc() == relocInfo::none) {
7838     return false;
7839   }
7840   if (adr.reloc() == relocInfo::internal_word_type) {
7841     // This should be rip relative and easily reachable.
7842     return true;
7843   }
7844   if (adr.reloc() == relocInfo::virtual_call_type ||
7845       adr.reloc() == relocInfo::opt_virtual_call_type ||
7846       adr.reloc() == relocInfo::static_call_type ||
7847       adr.reloc() == relocInfo::static_stub_type ) {
7848     // This should be rip relative within the code cache and easily
7849     // reachable until we get huge code caches. (At which point
7850     // ic code is going to have issues).
7851     return true;
7852   }
7853   if (adr.reloc() != relocInfo::external_word_type &&
7854       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7855       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7856       adr.reloc() != relocInfo::runtime_call_type ) {
7857     return false;
7858   }
7859 
7860   // Stress the correction code
7861   if (ForceUnreachable) {
7862     // Must be runtimecall reloc, see if it is in the codecache
7863     // Flipping stuff in the codecache to be unreachable causes issues
7864     // with things like inline caches where the additional instructions
7865     // are not handled.
7866     if (CodeCache::find_blob(adr._target) == NULL) {
7867       return false;
7868     }
7869   }
7870   // For external_word_type/runtime_call_type if it is reachable from where we
7871   // are now (possibly a temp buffer) and where we might end up
7872   // anywhere in the codeCache then we are always reachable.
7873   // This would have to change if we ever save/restore shared code
7874   // to be more pessimistic.
7875   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7876   if (!is_simm32(disp)) return false;
7877   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7878   if (!is_simm32(disp)) return false;
7879 
7880   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7881 
7882   // Because rip relative is a disp + address_of_next_instruction and we
7883   // don't know the value of address_of_next_instruction we apply a fudge factor
7884   // to make sure we will be ok no matter the size of the instruction we get placed into.
7885   // We don't have to fudge the checks above here because they are already worst case.
7886 
7887   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7888   // + 4 because better safe than sorry.
7889   const int fudge = 12 + 4;
7890   if (disp < 0) {
7891     disp -= fudge;
7892   } else {
7893     disp += fudge;
7894   }
7895   return is_simm32(disp);
7896 }
7897 
7898 // Check if the polling page is not reachable from the code cache using rip-relative
7899 // addressing.
7900 bool Assembler::is_polling_page_far() {
7901   intptr_t addr = (intptr_t)os::get_polling_page();
7902   return ForceUnreachable ||
7903          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7904          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7905 }
7906 
7907 void Assembler::emit_data64(jlong data,
7908                             relocInfo::relocType rtype,
7909                             int format) {
7910   if (rtype == relocInfo::none) {
7911     emit_int64(data);
7912   } else {
7913     emit_data64(data, Relocation::spec_simple(rtype), format);
7914   }
7915 }
7916 
7917 void Assembler::emit_data64(jlong data,
7918                             RelocationHolder const& rspec,
7919                             int format) {
7920   assert(imm_operand == 0, "default format must be immediate in this file");
7921   assert(imm_operand == format, "must be immediate");
7922   assert(inst_mark() != NULL, "must be inside InstructionMark");
7923   // Do not use AbstractAssembler::relocate, which is not intended for
7924   // embedded words.  Instead, relocate to the enclosing instruction.
7925   code_section()->relocate(inst_mark(), rspec, format);
7926 #ifdef ASSERT
7927   check_relocation(rspec, format);
7928 #endif
7929   emit_int64(data);
7930 }
7931 
7932 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7933   if (reg_enc >= 8) {
7934     prefix(REX_B);
7935     reg_enc -= 8;
7936   } else if (byteinst && reg_enc >= 4) {
7937     prefix(REX);
7938   }
7939   return reg_enc;
7940 }
7941 
7942 int Assembler::prefixq_and_encode(int reg_enc) {
7943   if (reg_enc < 8) {
7944     prefix(REX_W);
7945   } else {
7946     prefix(REX_WB);
7947     reg_enc -= 8;
7948   }
7949   return reg_enc;
7950 }
7951 
7952 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7953   if (dst_enc < 8) {
7954     if (src_enc >= 8) {
7955       prefix(REX_B);
7956       src_enc -= 8;
7957     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7958       prefix(REX);
7959     }
7960   } else {
7961     if (src_enc < 8) {
7962       prefix(REX_R);
7963     } else {
7964       prefix(REX_RB);
7965       src_enc -= 8;
7966     }
7967     dst_enc -= 8;
7968   }
7969   return dst_enc << 3 | src_enc;
7970 }
7971 
7972 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7973   if (dst_enc < 8) {
7974     if (src_enc < 8) {
7975       prefix(REX_W);
7976     } else {
7977       prefix(REX_WB);
7978       src_enc -= 8;
7979     }
7980   } else {
7981     if (src_enc < 8) {
7982       prefix(REX_WR);
7983     } else {
7984       prefix(REX_WRB);
7985       src_enc -= 8;
7986     }
7987     dst_enc -= 8;
7988   }
7989   return dst_enc << 3 | src_enc;
7990 }
7991 
7992 void Assembler::prefix(Register reg) {
7993   if (reg->encoding() >= 8) {
7994     prefix(REX_B);
7995   }
7996 }
7997 
7998 void Assembler::prefix(Register dst, Register src, Prefix p) {
7999   if (src->encoding() >= 8) {
8000     p = (Prefix)(p | REX_B);
8001   }
8002   if (dst->encoding() >= 8) {
8003     p = (Prefix)( p | REX_R);
8004   }
8005   if (p != Prefix_EMPTY) {
8006     // do not generate an empty prefix
8007     prefix(p);
8008   }
8009 }
8010 
8011 void Assembler::prefix(Register dst, Address adr, Prefix p) {
8012   if (adr.base_needs_rex()) {
8013     if (adr.index_needs_rex()) {
8014       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
8015     } else {
8016       prefix(REX_B);
8017     }
8018   } else {
8019     if (adr.index_needs_rex()) {
8020       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
8021     }
8022   }
8023   if (dst->encoding() >= 8) {
8024     p = (Prefix)(p | REX_R);
8025   }
8026   if (p != Prefix_EMPTY) {
8027     // do not generate an empty prefix
8028     prefix(p);
8029   }
8030 }
8031 
8032 void Assembler::prefix(Address adr) {
8033   if (adr.base_needs_rex()) {
8034     if (adr.index_needs_rex()) {
8035       prefix(REX_XB);
8036     } else {
8037       prefix(REX_B);
8038     }
8039   } else {
8040     if (adr.index_needs_rex()) {
8041       prefix(REX_X);
8042     }
8043   }
8044 }
8045 
8046 void Assembler::prefixq(Address adr) {
8047   if (adr.base_needs_rex()) {
8048     if (adr.index_needs_rex()) {
8049       prefix(REX_WXB);
8050     } else {
8051       prefix(REX_WB);
8052     }
8053   } else {
8054     if (adr.index_needs_rex()) {
8055       prefix(REX_WX);
8056     } else {
8057       prefix(REX_W);
8058     }
8059   }
8060 }
8061 
8062 
8063 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
8064   if (reg->encoding() < 8) {
8065     if (adr.base_needs_rex()) {
8066       if (adr.index_needs_rex()) {
8067         prefix(REX_XB);
8068       } else {
8069         prefix(REX_B);
8070       }
8071     } else {
8072       if (adr.index_needs_rex()) {
8073         prefix(REX_X);
8074       } else if (byteinst && reg->encoding() >= 4 ) {
8075         prefix(REX);
8076       }
8077     }
8078   } else {
8079     if (adr.base_needs_rex()) {
8080       if (adr.index_needs_rex()) {
8081         prefix(REX_RXB);
8082       } else {
8083         prefix(REX_RB);
8084       }
8085     } else {
8086       if (adr.index_needs_rex()) {
8087         prefix(REX_RX);
8088       } else {
8089         prefix(REX_R);
8090       }
8091     }
8092   }
8093 }
8094 
8095 void Assembler::prefixq(Address adr, Register src) {
8096   if (src->encoding() < 8) {
8097     if (adr.base_needs_rex()) {
8098       if (adr.index_needs_rex()) {
8099         prefix(REX_WXB);
8100       } else {
8101         prefix(REX_WB);
8102       }
8103     } else {
8104       if (adr.index_needs_rex()) {
8105         prefix(REX_WX);
8106       } else {
8107         prefix(REX_W);
8108       }
8109     }
8110   } else {
8111     if (adr.base_needs_rex()) {
8112       if (adr.index_needs_rex()) {
8113         prefix(REX_WRXB);
8114       } else {
8115         prefix(REX_WRB);
8116       }
8117     } else {
8118       if (adr.index_needs_rex()) {
8119         prefix(REX_WRX);
8120       } else {
8121         prefix(REX_WR);
8122       }
8123     }
8124   }
8125 }
8126 
8127 void Assembler::prefix(Address adr, XMMRegister reg) {
8128   if (reg->encoding() < 8) {
8129     if (adr.base_needs_rex()) {
8130       if (adr.index_needs_rex()) {
8131         prefix(REX_XB);
8132       } else {
8133         prefix(REX_B);
8134       }
8135     } else {
8136       if (adr.index_needs_rex()) {
8137         prefix(REX_X);
8138       }
8139     }
8140   } else {
8141     if (adr.base_needs_rex()) {
8142       if (adr.index_needs_rex()) {
8143         prefix(REX_RXB);
8144       } else {
8145         prefix(REX_RB);
8146       }
8147     } else {
8148       if (adr.index_needs_rex()) {
8149         prefix(REX_RX);
8150       } else {
8151         prefix(REX_R);
8152       }
8153     }
8154   }
8155 }
8156 
8157 void Assembler::prefixq(Address adr, XMMRegister src) {
8158   if (src->encoding() < 8) {
8159     if (adr.base_needs_rex()) {
8160       if (adr.index_needs_rex()) {
8161         prefix(REX_WXB);
8162       } else {
8163         prefix(REX_WB);
8164       }
8165     } else {
8166       if (adr.index_needs_rex()) {
8167         prefix(REX_WX);
8168       } else {
8169         prefix(REX_W);
8170       }
8171     }
8172   } else {
8173     if (adr.base_needs_rex()) {
8174       if (adr.index_needs_rex()) {
8175         prefix(REX_WRXB);
8176       } else {
8177         prefix(REX_WRB);
8178       }
8179     } else {
8180       if (adr.index_needs_rex()) {
8181         prefix(REX_WRX);
8182       } else {
8183         prefix(REX_WR);
8184       }
8185     }
8186   }
8187 }
8188 
8189 void Assembler::adcq(Register dst, int32_t imm32) {
8190   (void) prefixq_and_encode(dst->encoding());
8191   emit_arith(0x81, 0xD0, dst, imm32);
8192 }
8193 
8194 void Assembler::adcq(Register dst, Address src) {
8195   InstructionMark im(this);
8196   prefixq(src, dst);
8197   emit_int8(0x13);
8198   emit_operand(dst, src);
8199 }
8200 
8201 void Assembler::adcq(Register dst, Register src) {
8202   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8203   emit_arith(0x13, 0xC0, dst, src);
8204 }
8205 
8206 void Assembler::addq(Address dst, int32_t imm32) {
8207   InstructionMark im(this);
8208   prefixq(dst);
8209   emit_arith_operand(0x81, rax, dst,imm32);
8210 }
8211 
8212 void Assembler::addq(Address dst, Register src) {
8213   InstructionMark im(this);
8214   prefixq(dst, src);
8215   emit_int8(0x01);
8216   emit_operand(src, dst);
8217 }
8218 
8219 void Assembler::addq(Register dst, int32_t imm32) {
8220   (void) prefixq_and_encode(dst->encoding());
8221   emit_arith(0x81, 0xC0, dst, imm32);
8222 }
8223 
8224 void Assembler::addq(Register dst, Address src) {
8225   InstructionMark im(this);
8226   prefixq(src, dst);
8227   emit_int8(0x03);
8228   emit_operand(dst, src);
8229 }
8230 
8231 void Assembler::addq(Register dst, Register src) {
8232   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8233   emit_arith(0x03, 0xC0, dst, src);
8234 }
8235 
8236 void Assembler::adcxq(Register dst, Register src) {
8237   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8238   emit_int8((unsigned char)0x66);
8239   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8240   emit_int8(0x0F);
8241   emit_int8(0x38);
8242   emit_int8((unsigned char)0xF6);
8243   emit_int8((unsigned char)(0xC0 | encode));
8244 }
8245 
8246 void Assembler::adoxq(Register dst, Register src) {
8247   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8248   emit_int8((unsigned char)0xF3);
8249   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8250   emit_int8(0x0F);
8251   emit_int8(0x38);
8252   emit_int8((unsigned char)0xF6);
8253   emit_int8((unsigned char)(0xC0 | encode));
8254 }
8255 
8256 void Assembler::andq(Address dst, int32_t imm32) {
8257   InstructionMark im(this);
8258   prefixq(dst);
8259   emit_int8((unsigned char)0x81);
8260   emit_operand(rsp, dst, 4);
8261   emit_int32(imm32);
8262 }
8263 
8264 void Assembler::andq(Register dst, int32_t imm32) {
8265   (void) prefixq_and_encode(dst->encoding());
8266   emit_arith(0x81, 0xE0, dst, imm32);
8267 }
8268 
8269 void Assembler::andq(Register dst, Address src) {
8270   InstructionMark im(this);
8271   prefixq(src, dst);
8272   emit_int8(0x23);
8273   emit_operand(dst, src);
8274 }
8275 
8276 void Assembler::andq(Register dst, Register src) {
8277   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8278   emit_arith(0x23, 0xC0, dst, src);
8279 }
8280 
8281 void Assembler::andnq(Register dst, Register src1, Register src2) {
8282   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8283   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8284   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8285   emit_int8((unsigned char)0xF2);
8286   emit_int8((unsigned char)(0xC0 | encode));
8287 }
8288 
8289 void Assembler::andnq(Register dst, Register src1, Address src2) {
8290   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8291   InstructionMark im(this);
8292   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8293   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8294   emit_int8((unsigned char)0xF2);
8295   emit_operand(dst, src2);
8296 }
8297 
8298 void Assembler::bsfq(Register dst, Register src) {
8299   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8300   emit_int8(0x0F);
8301   emit_int8((unsigned char)0xBC);
8302   emit_int8((unsigned char)(0xC0 | encode));
8303 }
8304 
8305 void Assembler::bsrq(Register dst, Register src) {
8306   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8307   emit_int8(0x0F);
8308   emit_int8((unsigned char)0xBD);
8309   emit_int8((unsigned char)(0xC0 | encode));
8310 }
8311 
8312 void Assembler::bswapq(Register reg) {
8313   int encode = prefixq_and_encode(reg->encoding());
8314   emit_int8(0x0F);
8315   emit_int8((unsigned char)(0xC8 | encode));
8316 }
8317 
8318 void Assembler::blsiq(Register dst, Register src) {
8319   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8320   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8321   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8322   emit_int8((unsigned char)0xF3);
8323   emit_int8((unsigned char)(0xC0 | encode));
8324 }
8325 
8326 void Assembler::blsiq(Register dst, Address src) {
8327   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8328   InstructionMark im(this);
8329   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8330   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8331   emit_int8((unsigned char)0xF3);
8332   emit_operand(rbx, src);
8333 }
8334 
8335 void Assembler::blsmskq(Register dst, Register src) {
8336   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8337   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8338   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8339   emit_int8((unsigned char)0xF3);
8340   emit_int8((unsigned char)(0xC0 | encode));
8341 }
8342 
8343 void Assembler::blsmskq(Register dst, Address src) {
8344   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8345   InstructionMark im(this);
8346   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8347   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8348   emit_int8((unsigned char)0xF3);
8349   emit_operand(rdx, src);
8350 }
8351 
8352 void Assembler::blsrq(Register dst, Register src) {
8353   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8354   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8355   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8356   emit_int8((unsigned char)0xF3);
8357   emit_int8((unsigned char)(0xC0 | encode));
8358 }
8359 
8360 void Assembler::blsrq(Register dst, Address src) {
8361   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8362   InstructionMark im(this);
8363   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8364   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8365   emit_int8((unsigned char)0xF3);
8366   emit_operand(rcx, src);
8367 }
8368 
8369 void Assembler::cdqq() {
8370   prefix(REX_W);
8371   emit_int8((unsigned char)0x99);
8372 }
8373 
8374 void Assembler::clflush(Address adr) {
8375   prefix(adr);
8376   emit_int8(0x0F);
8377   emit_int8((unsigned char)0xAE);
8378   emit_operand(rdi, adr);
8379 }
8380 
8381 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8382   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8383   emit_int8(0x0F);
8384   emit_int8(0x40 | cc);
8385   emit_int8((unsigned char)(0xC0 | encode));
8386 }
8387 
8388 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8389   InstructionMark im(this);
8390   prefixq(src, dst);
8391   emit_int8(0x0F);
8392   emit_int8(0x40 | cc);
8393   emit_operand(dst, src);
8394 }
8395 
8396 void Assembler::cmpq(Address dst, int32_t imm32) {
8397   InstructionMark im(this);
8398   prefixq(dst);
8399   emit_int8((unsigned char)0x81);
8400   emit_operand(rdi, dst, 4);
8401   emit_int32(imm32);
8402 }
8403 
8404 void Assembler::cmpq(Register dst, int32_t imm32) {
8405   (void) prefixq_and_encode(dst->encoding());
8406   emit_arith(0x81, 0xF8, dst, imm32);
8407 }
8408 
8409 void Assembler::cmpq(Address dst, Register src) {
8410   InstructionMark im(this);
8411   prefixq(dst, src);
8412   emit_int8(0x3B);
8413   emit_operand(src, dst);
8414 }
8415 
8416 void Assembler::cmpq(Register dst, Register src) {
8417   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8418   emit_arith(0x3B, 0xC0, dst, src);
8419 }
8420 
8421 void Assembler::cmpq(Register dst, Address  src) {
8422   InstructionMark im(this);
8423   prefixq(src, dst);
8424   emit_int8(0x3B);
8425   emit_operand(dst, src);
8426 }
8427 
8428 void Assembler::cmpxchgq(Register reg, Address adr) {
8429   InstructionMark im(this);
8430   prefixq(adr, reg);
8431   emit_int8(0x0F);
8432   emit_int8((unsigned char)0xB1);
8433   emit_operand(reg, adr);
8434 }
8435 
8436 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8437   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8438   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8439   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8440   emit_int8(0x2A);
8441   emit_int8((unsigned char)(0xC0 | encode));
8442 }
8443 
8444 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8445   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8446   InstructionMark im(this);
8447   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8448   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8449   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8450   emit_int8(0x2A);
8451   emit_operand(dst, src);
8452 }
8453 
8454 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8455   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8456   InstructionMark im(this);
8457   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8458   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8459   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8460   emit_int8(0x2A);
8461   emit_operand(dst, src);
8462 }
8463 
8464 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8465   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8466   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8467   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8468   emit_int8(0x2C);
8469   emit_int8((unsigned char)(0xC0 | encode));
8470 }
8471 
8472 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8473   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8474   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8475   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8476   emit_int8(0x2C);
8477   emit_int8((unsigned char)(0xC0 | encode));
8478 }
8479 
8480 void Assembler::decl(Register dst) {
8481   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8482   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8483   int encode = prefix_and_encode(dst->encoding());
8484   emit_int8((unsigned char)0xFF);
8485   emit_int8((unsigned char)(0xC8 | encode));
8486 }
8487 
8488 void Assembler::decq(Register dst) {
8489   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8490   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8491   int encode = prefixq_and_encode(dst->encoding());
8492   emit_int8((unsigned char)0xFF);
8493   emit_int8(0xC8 | encode);
8494 }
8495 
8496 void Assembler::decq(Address dst) {
8497   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8498   InstructionMark im(this);
8499   prefixq(dst);
8500   emit_int8((unsigned char)0xFF);
8501   emit_operand(rcx, dst);
8502 }
8503 
8504 void Assembler::fxrstor(Address src) {
8505   prefixq(src);
8506   emit_int8(0x0F);
8507   emit_int8((unsigned char)0xAE);
8508   emit_operand(as_Register(1), src);
8509 }
8510 
8511 void Assembler::xrstor(Address src) {
8512   prefixq(src);
8513   emit_int8(0x0F);
8514   emit_int8((unsigned char)0xAE);
8515   emit_operand(as_Register(5), src);
8516 }
8517 
8518 void Assembler::fxsave(Address dst) {
8519   prefixq(dst);
8520   emit_int8(0x0F);
8521   emit_int8((unsigned char)0xAE);
8522   emit_operand(as_Register(0), dst);
8523 }
8524 
8525 void Assembler::xsave(Address dst) {
8526   prefixq(dst);
8527   emit_int8(0x0F);
8528   emit_int8((unsigned char)0xAE);
8529   emit_operand(as_Register(4), dst);
8530 }
8531 
8532 void Assembler::idivq(Register src) {
8533   int encode = prefixq_and_encode(src->encoding());
8534   emit_int8((unsigned char)0xF7);
8535   emit_int8((unsigned char)(0xF8 | encode));
8536 }
8537 
8538 void Assembler::imulq(Register dst, Register src) {
8539   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8540   emit_int8(0x0F);
8541   emit_int8((unsigned char)0xAF);
8542   emit_int8((unsigned char)(0xC0 | encode));
8543 }
8544 
8545 void Assembler::imulq(Register dst, Register src, int value) {
8546   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8547   if (is8bit(value)) {
8548     emit_int8(0x6B);
8549     emit_int8((unsigned char)(0xC0 | encode));
8550     emit_int8(value & 0xFF);
8551   } else {
8552     emit_int8(0x69);
8553     emit_int8((unsigned char)(0xC0 | encode));
8554     emit_int32(value);
8555   }
8556 }
8557 
8558 void Assembler::imulq(Register dst, Address src) {
8559   InstructionMark im(this);
8560   prefixq(src, dst);
8561   emit_int8(0x0F);
8562   emit_int8((unsigned char) 0xAF);
8563   emit_operand(dst, src);
8564 }
8565 
8566 void Assembler::incl(Register dst) {
8567   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8568   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8569   int encode = prefix_and_encode(dst->encoding());
8570   emit_int8((unsigned char)0xFF);
8571   emit_int8((unsigned char)(0xC0 | encode));
8572 }
8573 
8574 void Assembler::incq(Register dst) {
8575   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8576   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8577   int encode = prefixq_and_encode(dst->encoding());
8578   emit_int8((unsigned char)0xFF);
8579   emit_int8((unsigned char)(0xC0 | encode));
8580 }
8581 
8582 void Assembler::incq(Address dst) {
8583   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8584   InstructionMark im(this);
8585   prefixq(dst);
8586   emit_int8((unsigned char)0xFF);
8587   emit_operand(rax, dst);
8588 }
8589 
8590 void Assembler::lea(Register dst, Address src) {
8591   leaq(dst, src);
8592 }
8593 
8594 void Assembler::leaq(Register dst, Address src) {
8595   InstructionMark im(this);
8596   prefixq(src, dst);
8597   emit_int8((unsigned char)0x8D);
8598   emit_operand(dst, src);
8599 }
8600 
8601 void Assembler::mov64(Register dst, int64_t imm64) {
8602   InstructionMark im(this);
8603   int encode = prefixq_and_encode(dst->encoding());
8604   emit_int8((unsigned char)(0xB8 | encode));
8605   emit_int64(imm64);
8606 }
8607 
8608 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8609   InstructionMark im(this);
8610   int encode = prefixq_and_encode(dst->encoding());
8611   emit_int8(0xB8 | encode);
8612   emit_data64(imm64, rspec);
8613 }
8614 
8615 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8616   InstructionMark im(this);
8617   int encode = prefix_and_encode(dst->encoding());
8618   emit_int8((unsigned char)(0xB8 | encode));
8619   emit_data((int)imm32, rspec, narrow_oop_operand);
8620 }
8621 
8622 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8623   InstructionMark im(this);
8624   prefix(dst);
8625   emit_int8((unsigned char)0xC7);
8626   emit_operand(rax, dst, 4);
8627   emit_data((int)imm32, rspec, narrow_oop_operand);
8628 }
8629 
8630 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8631   InstructionMark im(this);
8632   int encode = prefix_and_encode(src1->encoding());
8633   emit_int8((unsigned char)0x81);
8634   emit_int8((unsigned char)(0xF8 | encode));
8635   emit_data((int)imm32, rspec, narrow_oop_operand);
8636 }
8637 
8638 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8639   InstructionMark im(this);
8640   prefix(src1);
8641   emit_int8((unsigned char)0x81);
8642   emit_operand(rax, src1, 4);
8643   emit_data((int)imm32, rspec, narrow_oop_operand);
8644 }
8645 
8646 void Assembler::lzcntq(Register dst, Register src) {
8647   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8648   emit_int8((unsigned char)0xF3);
8649   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8650   emit_int8(0x0F);
8651   emit_int8((unsigned char)0xBD);
8652   emit_int8((unsigned char)(0xC0 | encode));
8653 }
8654 
8655 void Assembler::movdq(XMMRegister dst, Register src) {
8656   // table D-1 says MMX/SSE2
8657   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8658   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8659   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8660   emit_int8(0x6E);
8661   emit_int8((unsigned char)(0xC0 | encode));
8662 }
8663 
8664 void Assembler::movdq(Register dst, XMMRegister src) {
8665   // table D-1 says MMX/SSE2
8666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8667   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8668   // swap src/dst to get correct prefix
8669   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8670   emit_int8(0x7E);
8671   emit_int8((unsigned char)(0xC0 | encode));
8672 }
8673 
8674 void Assembler::movq(Register dst, Register src) {
8675   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8676   emit_int8((unsigned char)0x8B);
8677   emit_int8((unsigned char)(0xC0 | encode));
8678 }
8679 
8680 void Assembler::movq(Register dst, Address src) {
8681   InstructionMark im(this);
8682   prefixq(src, dst);
8683   emit_int8((unsigned char)0x8B);
8684   emit_operand(dst, src);
8685 }
8686 
8687 void Assembler::movq(Address dst, Register src) {
8688   InstructionMark im(this);
8689   prefixq(dst, src);
8690   emit_int8((unsigned char)0x89);
8691   emit_operand(src, dst);
8692 }
8693 
8694 void Assembler::movsbq(Register dst, Address src) {
8695   InstructionMark im(this);
8696   prefixq(src, dst);
8697   emit_int8(0x0F);
8698   emit_int8((unsigned char)0xBE);
8699   emit_operand(dst, src);
8700 }
8701 
8702 void Assembler::movsbq(Register dst, Register src) {
8703   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8704   emit_int8(0x0F);
8705   emit_int8((unsigned char)0xBE);
8706   emit_int8((unsigned char)(0xC0 | encode));
8707 }
8708 
8709 void Assembler::movslq(Register dst, int32_t imm32) {
8710   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8711   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8712   // as a result we shouldn't use until tested at runtime...
8713   ShouldNotReachHere();
8714   InstructionMark im(this);
8715   int encode = prefixq_and_encode(dst->encoding());
8716   emit_int8((unsigned char)(0xC7 | encode));
8717   emit_int32(imm32);
8718 }
8719 
8720 void Assembler::movslq(Address dst, int32_t imm32) {
8721   assert(is_simm32(imm32), "lost bits");
8722   InstructionMark im(this);
8723   prefixq(dst);
8724   emit_int8((unsigned char)0xC7);
8725   emit_operand(rax, dst, 4);
8726   emit_int32(imm32);
8727 }
8728 
8729 void Assembler::movslq(Register dst, Address src) {
8730   InstructionMark im(this);
8731   prefixq(src, dst);
8732   emit_int8(0x63);
8733   emit_operand(dst, src);
8734 }
8735 
8736 void Assembler::movslq(Register dst, Register src) {
8737   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8738   emit_int8(0x63);
8739   emit_int8((unsigned char)(0xC0 | encode));
8740 }
8741 
8742 void Assembler::movswq(Register dst, Address src) {
8743   InstructionMark im(this);
8744   prefixq(src, dst);
8745   emit_int8(0x0F);
8746   emit_int8((unsigned char)0xBF);
8747   emit_operand(dst, src);
8748 }
8749 
8750 void Assembler::movswq(Register dst, Register src) {
8751   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8752   emit_int8((unsigned char)0x0F);
8753   emit_int8((unsigned char)0xBF);
8754   emit_int8((unsigned char)(0xC0 | encode));
8755 }
8756 
8757 void Assembler::movzbq(Register dst, Address src) {
8758   InstructionMark im(this);
8759   prefixq(src, dst);
8760   emit_int8((unsigned char)0x0F);
8761   emit_int8((unsigned char)0xB6);
8762   emit_operand(dst, src);
8763 }
8764 
8765 void Assembler::movzbq(Register dst, Register src) {
8766   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8767   emit_int8(0x0F);
8768   emit_int8((unsigned char)0xB6);
8769   emit_int8(0xC0 | encode);
8770 }
8771 
8772 void Assembler::movzwq(Register dst, Address src) {
8773   InstructionMark im(this);
8774   prefixq(src, dst);
8775   emit_int8((unsigned char)0x0F);
8776   emit_int8((unsigned char)0xB7);
8777   emit_operand(dst, src);
8778 }
8779 
8780 void Assembler::movzwq(Register dst, Register src) {
8781   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8782   emit_int8((unsigned char)0x0F);
8783   emit_int8((unsigned char)0xB7);
8784   emit_int8((unsigned char)(0xC0 | encode));
8785 }
8786 
8787 void Assembler::mulq(Address src) {
8788   InstructionMark im(this);
8789   prefixq(src);
8790   emit_int8((unsigned char)0xF7);
8791   emit_operand(rsp, src);
8792 }
8793 
8794 void Assembler::mulq(Register src) {
8795   int encode = prefixq_and_encode(src->encoding());
8796   emit_int8((unsigned char)0xF7);
8797   emit_int8((unsigned char)(0xE0 | encode));
8798 }
8799 
8800 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8801   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8802   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8803   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8804   emit_int8((unsigned char)0xF6);
8805   emit_int8((unsigned char)(0xC0 | encode));
8806 }
8807 
8808 void Assembler::negq(Register dst) {
8809   int encode = prefixq_and_encode(dst->encoding());
8810   emit_int8((unsigned char)0xF7);
8811   emit_int8((unsigned char)(0xD8 | encode));
8812 }
8813 
8814 void Assembler::notq(Register dst) {
8815   int encode = prefixq_and_encode(dst->encoding());
8816   emit_int8((unsigned char)0xF7);
8817   emit_int8((unsigned char)(0xD0 | encode));
8818 }
8819 
8820 void Assembler::orq(Address dst, int32_t imm32) {
8821   InstructionMark im(this);
8822   prefixq(dst);
8823   emit_int8((unsigned char)0x81);
8824   emit_operand(rcx, dst, 4);
8825   emit_int32(imm32);
8826 }
8827 
8828 void Assembler::orq(Register dst, int32_t imm32) {
8829   (void) prefixq_and_encode(dst->encoding());
8830   emit_arith(0x81, 0xC8, dst, imm32);
8831 }
8832 
8833 void Assembler::orq(Register dst, Address src) {
8834   InstructionMark im(this);
8835   prefixq(src, dst);
8836   emit_int8(0x0B);
8837   emit_operand(dst, src);
8838 }
8839 
8840 void Assembler::orq(Register dst, Register src) {
8841   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8842   emit_arith(0x0B, 0xC0, dst, src);
8843 }
8844 
8845 void Assembler::popa() { // 64bit
8846   movq(r15, Address(rsp, 0));
8847   movq(r14, Address(rsp, wordSize));
8848   movq(r13, Address(rsp, 2 * wordSize));
8849   movq(r12, Address(rsp, 3 * wordSize));
8850   movq(r11, Address(rsp, 4 * wordSize));
8851   movq(r10, Address(rsp, 5 * wordSize));
8852   movq(r9,  Address(rsp, 6 * wordSize));
8853   movq(r8,  Address(rsp, 7 * wordSize));
8854   movq(rdi, Address(rsp, 8 * wordSize));
8855   movq(rsi, Address(rsp, 9 * wordSize));
8856   movq(rbp, Address(rsp, 10 * wordSize));
8857   // skip rsp
8858   movq(rbx, Address(rsp, 12 * wordSize));
8859   movq(rdx, Address(rsp, 13 * wordSize));
8860   movq(rcx, Address(rsp, 14 * wordSize));
8861   movq(rax, Address(rsp, 15 * wordSize));
8862 
8863   addq(rsp, 16 * wordSize);
8864 }
8865 
8866 void Assembler::popcntq(Register dst, Address src) {
8867   assert(VM_Version::supports_popcnt(), "must support");
8868   InstructionMark im(this);
8869   emit_int8((unsigned char)0xF3);
8870   prefixq(src, dst);
8871   emit_int8((unsigned char)0x0F);
8872   emit_int8((unsigned char)0xB8);
8873   emit_operand(dst, src);
8874 }
8875 
8876 void Assembler::popcntq(Register dst, Register src) {
8877   assert(VM_Version::supports_popcnt(), "must support");
8878   emit_int8((unsigned char)0xF3);
8879   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8880   emit_int8((unsigned char)0x0F);
8881   emit_int8((unsigned char)0xB8);
8882   emit_int8((unsigned char)(0xC0 | encode));
8883 }
8884 
8885 void Assembler::popq(Address dst) {
8886   InstructionMark im(this);
8887   prefixq(dst);
8888   emit_int8((unsigned char)0x8F);
8889   emit_operand(rax, dst);
8890 }
8891 
8892 void Assembler::pusha() { // 64bit
8893   // we have to store original rsp.  ABI says that 128 bytes
8894   // below rsp are local scratch.
8895   movq(Address(rsp, -5 * wordSize), rsp);
8896 
8897   subq(rsp, 16 * wordSize);
8898 
8899   movq(Address(rsp, 15 * wordSize), rax);
8900   movq(Address(rsp, 14 * wordSize), rcx);
8901   movq(Address(rsp, 13 * wordSize), rdx);
8902   movq(Address(rsp, 12 * wordSize), rbx);
8903   // skip rsp
8904   movq(Address(rsp, 10 * wordSize), rbp);
8905   movq(Address(rsp, 9 * wordSize), rsi);
8906   movq(Address(rsp, 8 * wordSize), rdi);
8907   movq(Address(rsp, 7 * wordSize), r8);
8908   movq(Address(rsp, 6 * wordSize), r9);
8909   movq(Address(rsp, 5 * wordSize), r10);
8910   movq(Address(rsp, 4 * wordSize), r11);
8911   movq(Address(rsp, 3 * wordSize), r12);
8912   movq(Address(rsp, 2 * wordSize), r13);
8913   movq(Address(rsp, wordSize), r14);
8914   movq(Address(rsp, 0), r15);
8915 }
8916 
8917 void Assembler::pushq(Address src) {
8918   InstructionMark im(this);
8919   prefixq(src);
8920   emit_int8((unsigned char)0xFF);
8921   emit_operand(rsi, src);
8922 }
8923 
8924 void Assembler::rclq(Register dst, int imm8) {
8925   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8926   int encode = prefixq_and_encode(dst->encoding());
8927   if (imm8 == 1) {
8928     emit_int8((unsigned char)0xD1);
8929     emit_int8((unsigned char)(0xD0 | encode));
8930   } else {
8931     emit_int8((unsigned char)0xC1);
8932     emit_int8((unsigned char)(0xD0 | encode));
8933     emit_int8(imm8);
8934   }
8935 }
8936 
8937 void Assembler::rcrq(Register dst, int imm8) {
8938   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8939   int encode = prefixq_and_encode(dst->encoding());
8940   if (imm8 == 1) {
8941     emit_int8((unsigned char)0xD1);
8942     emit_int8((unsigned char)(0xD8 | encode));
8943   } else {
8944     emit_int8((unsigned char)0xC1);
8945     emit_int8((unsigned char)(0xD8 | encode));
8946     emit_int8(imm8);
8947   }
8948 }
8949 
8950 void Assembler::rorq(Register dst, int imm8) {
8951   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8952   int encode = prefixq_and_encode(dst->encoding());
8953   if (imm8 == 1) {
8954     emit_int8((unsigned char)0xD1);
8955     emit_int8((unsigned char)(0xC8 | encode));
8956   } else {
8957     emit_int8((unsigned char)0xC1);
8958     emit_int8((unsigned char)(0xc8 | encode));
8959     emit_int8(imm8);
8960   }
8961 }
8962 
8963 void Assembler::rorxq(Register dst, Register src, int imm8) {
8964   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8965   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8966   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8967   emit_int8((unsigned char)0xF0);
8968   emit_int8((unsigned char)(0xC0 | encode));
8969   emit_int8(imm8);
8970 }
8971 
8972 void Assembler::rorxd(Register dst, Register src, int imm8) {
8973   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8974   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8975   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8976   emit_int8((unsigned char)0xF0);
8977   emit_int8((unsigned char)(0xC0 | encode));
8978   emit_int8(imm8);
8979 }
8980 
8981 void Assembler::sarq(Register dst, int imm8) {
8982   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8983   int encode = prefixq_and_encode(dst->encoding());
8984   if (imm8 == 1) {
8985     emit_int8((unsigned char)0xD1);
8986     emit_int8((unsigned char)(0xF8 | encode));
8987   } else {
8988     emit_int8((unsigned char)0xC1);
8989     emit_int8((unsigned char)(0xF8 | encode));
8990     emit_int8(imm8);
8991   }
8992 }
8993 
8994 void Assembler::sarq(Register dst) {
8995   int encode = prefixq_and_encode(dst->encoding());
8996   emit_int8((unsigned char)0xD3);
8997   emit_int8((unsigned char)(0xF8 | encode));
8998 }
8999 
9000 void Assembler::sbbq(Address dst, int32_t imm32) {
9001   InstructionMark im(this);
9002   prefixq(dst);
9003   emit_arith_operand(0x81, rbx, dst, imm32);
9004 }
9005 
9006 void Assembler::sbbq(Register dst, int32_t imm32) {
9007   (void) prefixq_and_encode(dst->encoding());
9008   emit_arith(0x81, 0xD8, dst, imm32);
9009 }
9010 
9011 void Assembler::sbbq(Register dst, Address src) {
9012   InstructionMark im(this);
9013   prefixq(src, dst);
9014   emit_int8(0x1B);
9015   emit_operand(dst, src);
9016 }
9017 
9018 void Assembler::sbbq(Register dst, Register src) {
9019   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9020   emit_arith(0x1B, 0xC0, dst, src);
9021 }
9022 
9023 void Assembler::shlq(Register dst, int imm8) {
9024   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9025   int encode = prefixq_and_encode(dst->encoding());
9026   if (imm8 == 1) {
9027     emit_int8((unsigned char)0xD1);
9028     emit_int8((unsigned char)(0xE0 | encode));
9029   } else {
9030     emit_int8((unsigned char)0xC1);
9031     emit_int8((unsigned char)(0xE0 | encode));
9032     emit_int8(imm8);
9033   }
9034 }
9035 
9036 void Assembler::shlq(Register dst) {
9037   int encode = prefixq_and_encode(dst->encoding());
9038   emit_int8((unsigned char)0xD3);
9039   emit_int8((unsigned char)(0xE0 | encode));
9040 }
9041 
9042 void Assembler::shrq(Register dst, int imm8) {
9043   assert(isShiftCount(imm8 >> 1), "illegal shift count");
9044   int encode = prefixq_and_encode(dst->encoding());
9045   emit_int8((unsigned char)0xC1);
9046   emit_int8((unsigned char)(0xE8 | encode));
9047   emit_int8(imm8);
9048 }
9049 
9050 void Assembler::shrq(Register dst) {
9051   int encode = prefixq_and_encode(dst->encoding());
9052   emit_int8((unsigned char)0xD3);
9053   emit_int8(0xE8 | encode);
9054 }
9055 
9056 void Assembler::subq(Address dst, int32_t imm32) {
9057   InstructionMark im(this);
9058   prefixq(dst);
9059   emit_arith_operand(0x81, rbp, dst, imm32);
9060 }
9061 
9062 void Assembler::subq(Address dst, Register src) {
9063   InstructionMark im(this);
9064   prefixq(dst, src);
9065   emit_int8(0x29);
9066   emit_operand(src, dst);
9067 }
9068 
9069 void Assembler::subq(Register dst, int32_t imm32) {
9070   (void) prefixq_and_encode(dst->encoding());
9071   emit_arith(0x81, 0xE8, dst, imm32);
9072 }
9073 
9074 // Force generation of a 4 byte immediate value even if it fits into 8bit
9075 void Assembler::subq_imm32(Register dst, int32_t imm32) {
9076   (void) prefixq_and_encode(dst->encoding());
9077   emit_arith_imm32(0x81, 0xE8, dst, imm32);
9078 }
9079 
9080 void Assembler::subq(Register dst, Address src) {
9081   InstructionMark im(this);
9082   prefixq(src, dst);
9083   emit_int8(0x2B);
9084   emit_operand(dst, src);
9085 }
9086 
9087 void Assembler::subq(Register dst, Register src) {
9088   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9089   emit_arith(0x2B, 0xC0, dst, src);
9090 }
9091 
9092 void Assembler::testq(Register dst, int32_t imm32) {
9093   // not using emit_arith because test
9094   // doesn't support sign-extension of
9095   // 8bit operands
9096   int encode = dst->encoding();
9097   if (encode == 0) {
9098     prefix(REX_W);
9099     emit_int8((unsigned char)0xA9);
9100   } else {
9101     encode = prefixq_and_encode(encode);
9102     emit_int8((unsigned char)0xF7);
9103     emit_int8((unsigned char)(0xC0 | encode));
9104   }
9105   emit_int32(imm32);
9106 }
9107 
9108 void Assembler::testq(Register dst, Register src) {
9109   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9110   emit_arith(0x85, 0xC0, dst, src);
9111 }
9112 
9113 void Assembler::xaddq(Address dst, Register src) {
9114   InstructionMark im(this);
9115   prefixq(dst, src);
9116   emit_int8(0x0F);
9117   emit_int8((unsigned char)0xC1);
9118   emit_operand(src, dst);
9119 }
9120 
9121 void Assembler::xchgq(Register dst, Address src) {
9122   InstructionMark im(this);
9123   prefixq(src, dst);
9124   emit_int8((unsigned char)0x87);
9125   emit_operand(dst, src);
9126 }
9127 
9128 void Assembler::xchgq(Register dst, Register src) {
9129   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9130   emit_int8((unsigned char)0x87);
9131   emit_int8((unsigned char)(0xc0 | encode));
9132 }
9133 
9134 void Assembler::xorq(Register dst, Register src) {
9135   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9136   emit_arith(0x33, 0xC0, dst, src);
9137 }
9138 
9139 void Assembler::xorq(Register dst, Address src) {
9140   InstructionMark im(this);
9141   prefixq(src, dst);
9142   emit_int8(0x33);
9143   emit_operand(dst, src);
9144 }
9145 
9146 #endif // !LP64