New src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 
  40 #ifdef PRODUCT
  41 #define BLOCK_COMMENT(str) /* nothing */
  42 #define STOP(error) stop(error)
  43 #else
  44 #define BLOCK_COMMENT(str) block_comment(str)
  45 #define STOP(error) block_comment(error); stop(error)
  46 #endif
  47 
  48 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  49 // Implementation of AddressLiteral
  50 
  51 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  52 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  53   // -----------------Table 4.5 -------------------- //
  54   16, 32, 64,  // EVEX_FV(0)
  55   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  56   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  57   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  58   8,  16, 32,  // EVEX_HV(0)
  59   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  60   // -----------------Table 4.6 -------------------- //
  61   16, 32, 64,  // EVEX_FVM(0)
  62   1,  1,  1,   // EVEX_T1S(0)
  63   2,  2,  2,   // EVEX_T1S(1)
  64   4,  4,  4,   // EVEX_T1S(2)
  65   8,  8,  8,   // EVEX_T1S(3)
  66   4,  4,  4,   // EVEX_T1F(0)
  67   8,  8,  8,   // EVEX_T1F(1)
  68   8,  8,  8,   // EVEX_T2(0)
  69   0,  16, 16,  // EVEX_T2(1)
  70   0,  16, 16,  // EVEX_T4(0)
  71   0,  0,  32,  // EVEX_T4(1)
  72   0,  0,  32,  // EVEX_T8(0)
  73   8,  16, 32,  // EVEX_HVM(0)
  74   4,  8,  16,  // EVEX_QVM(0)
  75   2,  4,  8,   // EVEX_OVM(0)
  76   16, 16, 16,  // EVEX_M128(0)
  77   8,  32, 64,  // EVEX_DUP(0)
  78   0,  0,  0    // EVEX_NTUP
  79 };
  80 
  81 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  82   _is_lval = false;
  83   _target = target;
  84   switch (rtype) {
  85   case relocInfo::oop_type:
  86   case relocInfo::metadata_type:
  87     // Oops are a special case. Normally they would be their own section
  88     // but in cases like icBuffer they are literals in the code stream that
  89     // we don't have a section for. We use none so that we get a literal address
  90     // which is always patchable.
  91     break;
  92   case relocInfo::external_word_type:
  93     _rspec = external_word_Relocation::spec(target);
  94     break;
  95   case relocInfo::internal_word_type:
  96     _rspec = internal_word_Relocation::spec(target);
  97     break;
  98   case relocInfo::opt_virtual_call_type:
  99     _rspec = opt_virtual_call_Relocation::spec();
 100     break;
 101   case relocInfo::static_call_type:
 102     _rspec = static_call_Relocation::spec();
 103     break;
 104   case relocInfo::runtime_call_type:
 105     _rspec = runtime_call_Relocation::spec();
 106     break;
 107   case relocInfo::poll_type:
 108   case relocInfo::poll_return_type:
 109     _rspec = Relocation::spec_simple(rtype);
 110     break;
 111   case relocInfo::none:
 112     break;
 113   default:
 114     ShouldNotReachHere();
 115     break;
 116   }
 117 }
 118 
 119 // Implementation of Address
 120 
 121 #ifdef _LP64
 122 
 123 Address Address::make_array(ArrayAddress adr) {
 124   // Not implementable on 64bit machines
 125   // Should have been handled higher up the call chain.
 126   ShouldNotReachHere();
 127   return Address();
 128 }
 129 
 130 // exceedingly dangerous constructor
 131 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 132   _base  = noreg;
 133   _index = noreg;
 134   _scale = no_scale;
 135   _disp  = disp;
 136   _xmmindex = xnoreg;
 137   _isxmmindex = false;
 138   switch (rtype) {
 139     case relocInfo::external_word_type:
 140       _rspec = external_word_Relocation::spec(loc);
 141       break;
 142     case relocInfo::internal_word_type:
 143       _rspec = internal_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::runtime_call_type:
 146       // HMM
 147       _rspec = runtime_call_Relocation::spec();
 148       break;
 149     case relocInfo::poll_type:
 150     case relocInfo::poll_return_type:
 151       _rspec = Relocation::spec_simple(rtype);
 152       break;
 153     case relocInfo::none:
 154       break;
 155     default:
 156       ShouldNotReachHere();
 157   }
 158 }
 159 #else // LP64
 160 
 161 Address Address::make_array(ArrayAddress adr) {
 162   AddressLiteral base = adr.base();
 163   Address index = adr.index();
 164   assert(index._disp == 0, "must not have disp"); // maybe it can?
 165   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 166   array._rspec = base._rspec;
 167   return array;
 168 }
 169 
 170 // exceedingly dangerous constructor
 171 Address::Address(address loc, RelocationHolder spec) {
 172   _base  = noreg;
 173   _index = noreg;
 174   _scale = no_scale;
 175   _disp  = (intptr_t) loc;
 176   _rspec = spec;
 177   _xmmindex = xnoreg;
 178   _isxmmindex = false;
 179 }
 180 
 181 #endif // _LP64
 182 
 183 
 184 
 185 // Convert the raw encoding form into the form expected by the constructor for
 186 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 187 // that to noreg for the Address constructor.
 188 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 189   RelocationHolder rspec;
 190   if (disp_reloc != relocInfo::none) {
 191     rspec = Relocation::spec_simple(disp_reloc);
 192   }
 193   bool valid_index = index != rsp->encoding();
 194   if (valid_index) {
 195     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 196     madr._rspec = rspec;
 197     return madr;
 198   } else {
 199     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 200     madr._rspec = rspec;
 201     return madr;
 202   }
 203 }
 204 
 205 // Implementation of Assembler
 206 
 207 int AbstractAssembler::code_fill_byte() {
 208   return (u_char)'\xF4'; // hlt
 209 }
 210 
 211 // make this go away someday
 212 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 213   if (rtype == relocInfo::none)
 214     emit_int32(data);
 215   else
 216     emit_data(data, Relocation::spec_simple(rtype), format);
 217 }
 218 
 219 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 220   assert(imm_operand == 0, "default format must be immediate in this file");
 221   assert(inst_mark() != NULL, "must be inside InstructionMark");
 222   if (rspec.type() !=  relocInfo::none) {
 223     #ifdef ASSERT
 224       check_relocation(rspec, format);
 225     #endif
 226     // Do not use AbstractAssembler::relocate, which is not intended for
 227     // embedded words.  Instead, relocate to the enclosing instruction.
 228 
 229     // hack. call32 is too wide for mask so use disp32
 230     if (format == call32_operand)
 231       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 232     else
 233       code_section()->relocate(inst_mark(), rspec, format);
 234   }
 235   emit_int32(data);
 236 }
 237 
 238 static int encode(Register r) {
 239   int enc = r->encoding();
 240   if (enc >= 8) {
 241     enc -= 8;
 242   }
 243   return enc;
 244 }
 245 
 246 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 247   assert(dst->has_byte_register(), "must have byte register");
 248   assert(isByte(op1) && isByte(op2), "wrong opcode");
 249   assert(isByte(imm8), "not a byte");
 250   assert((op1 & 0x01) == 0, "should be 8bit operation");
 251   emit_int24(op1, (op2 | encode(dst)), imm8);
 252 }
 253 
 254 
 255 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 256   assert(isByte(op1) && isByte(op2), "wrong opcode");
 257   assert((op1 & 0x01) == 1, "should be 32bit operation");
 258   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 259   if (is8bit(imm32)) {
 260     emit_int24(op1 | 0x02,        // set sign bit
 261                op2 | encode(dst),
 262                imm32 & 0xFF);
 263   } else {
 264     emit_int16(op1, (op2 | encode(dst)));
 265     emit_int32(imm32);
 266   }
 267 }
 268 
 269 // Force generation of a 4 byte immediate value even if it fits into 8bit
 270 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 271   assert(isByte(op1) && isByte(op2), "wrong opcode");
 272   assert((op1 & 0x01) == 1, "should be 32bit operation");
 273   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 274   emit_int16(op1, (op2 | encode(dst)));
 275   emit_int32(imm32);
 276 }
 277 
 278 // immediate-to-memory forms
 279 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 280   assert((op1 & 0x01) == 1, "should be 32bit operation");
 281   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 282   if (is8bit(imm32)) {
 283     emit_int8(op1 | 0x02); // set sign bit
 284     emit_operand(rm, adr, 1);
 285     emit_int8(imm32 & 0xFF);
 286   } else {
 287     emit_int8(op1);
 288     emit_operand(rm, adr, 4);
 289     emit_int32(imm32);
 290   }
 291 }
 292 
 293 
 294 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 295   assert(isByte(op1) && isByte(op2), "wrong opcode");
 296   emit_int16(op1, (op2 | encode(dst) << 3 | encode(src)));
 297 }
 298 
 299 
 300 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 301                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 302   int mod_idx = 0;
 303   // We will test if the displacement fits the compressed format and if so
 304   // apply the compression to the displacment iff the result is8bit.
 305   if (VM_Version::supports_evex() && is_evex_inst) {
 306     switch (cur_tuple_type) {
 307     case EVEX_FV:
 308       if ((cur_encoding & VEX_W) == VEX_W) {
 309         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 310       } else {
 311         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 312       }
 313       break;
 314 
 315     case EVEX_HV:
 316       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       break;
 318 
 319     case EVEX_FVM:
 320       break;
 321 
 322     case EVEX_T1S:
 323       switch (in_size_in_bits) {
 324       case EVEX_8bit:
 325         break;
 326 
 327       case EVEX_16bit:
 328         mod_idx = 1;
 329         break;
 330 
 331       case EVEX_32bit:
 332         mod_idx = 2;
 333         break;
 334 
 335       case EVEX_64bit:
 336         mod_idx = 3;
 337         break;
 338       }
 339       break;
 340 
 341     case EVEX_T1F:
 342     case EVEX_T2:
 343     case EVEX_T4:
 344       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 345       break;
 346 
 347     case EVEX_T8:
 348       break;
 349 
 350     case EVEX_HVM:
 351       break;
 352 
 353     case EVEX_QVM:
 354       break;
 355 
 356     case EVEX_OVM:
 357       break;
 358 
 359     case EVEX_M128:
 360       break;
 361 
 362     case EVEX_DUP:
 363       break;
 364 
 365     default:
 366       assert(0, "no valid evex tuple_table entry");
 367       break;
 368     }
 369 
 370     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 371       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 372       if ((disp % disp_factor) == 0) {
 373         int new_disp = disp / disp_factor;
 374         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 375           disp = new_disp;
 376         }
 377       } else {
 378         return false;
 379       }
 380     }
 381   }
 382   return (-0x80 <= disp && disp < 0x80);
 383 }
 384 
 385 
 386 bool Assembler::emit_compressed_disp_byte(int &disp) {
 387   int mod_idx = 0;
 388   // We will test if the displacement fits the compressed format and if so
 389   // apply the compression to the displacment iff the result is8bit.
 390   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 391     int evex_encoding = _attributes->get_evex_encoding();
 392     int tuple_type = _attributes->get_tuple_type();
 393     switch (tuple_type) {
 394     case EVEX_FV:
 395       if ((evex_encoding & VEX_W) == VEX_W) {
 396         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 397       } else {
 398         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 399       }
 400       break;
 401 
 402     case EVEX_HV:
 403       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       break;
 405 
 406     case EVEX_FVM:
 407       break;
 408 
 409     case EVEX_T1S:
 410       switch (_attributes->get_input_size()) {
 411       case EVEX_8bit:
 412         break;
 413 
 414       case EVEX_16bit:
 415         mod_idx = 1;
 416         break;
 417 
 418       case EVEX_32bit:
 419         mod_idx = 2;
 420         break;
 421 
 422       case EVEX_64bit:
 423         mod_idx = 3;
 424         break;
 425       }
 426       break;
 427 
 428     case EVEX_T1F:
 429     case EVEX_T2:
 430     case EVEX_T4:
 431       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 432       break;
 433 
 434     case EVEX_T8:
 435       break;
 436 
 437     case EVEX_HVM:
 438       break;
 439 
 440     case EVEX_QVM:
 441       break;
 442 
 443     case EVEX_OVM:
 444       break;
 445 
 446     case EVEX_M128:
 447       break;
 448 
 449     case EVEX_DUP:
 450       break;
 451 
 452     default:
 453       assert(0, "no valid evex tuple_table entry");
 454       break;
 455     }
 456 
 457     int vector_len = _attributes->get_vector_len();
 458     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 459       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 460       if ((disp % disp_factor) == 0) {
 461         int new_disp = disp / disp_factor;
 462         if (is8bit(new_disp)) {
 463           disp = new_disp;
 464         }
 465       } else {
 466         return false;
 467       }
 468     }
 469   }
 470   return is8bit(disp);
 471 }
 472 
 473 static bool is_valid_encoding(int reg_enc) {
 474   return reg_enc >= 0;
 475 }
 476 
 477 static int raw_encode(Register reg) {
 478   assert(reg == noreg || reg->is_valid(), "sanity");
 479   int reg_enc = (intptr_t)reg;
 480   assert(reg_enc == -1 || is_valid_encoding(reg_enc), "sanity");
 481   return reg_enc;
 482 }
 483 
 484 static int raw_encode(XMMRegister xmmreg) {
 485   assert(xmmreg == xnoreg || xmmreg->is_valid(), "sanity");
 486   int xmmreg_enc = (intptr_t)xmmreg;
 487   assert(xmmreg_enc == -1 || is_valid_encoding(xmmreg_enc), "sanity");
 488   return xmmreg_enc;
 489 }
 490 
 491 static int modrm_encoding(int mod, int dst_enc, int src_enc) {
 492   return (mod & 3) << 6 | (dst_enc & 7) << 3 | (src_enc & 7);
 493 }
 494 
 495 static int sib_encoding(Address::ScaleFactor scale, int index_enc, int base_enc) {
 496   return (scale & 3) << 6 | (index_enc & 7) << 3 | (base_enc & 7);
 497 }
 498 
 499 inline void Assembler::emit_modrm(int mod, int dst_enc, int src_enc) {
 500   assert((mod & 3) != 0b11, "forbidden");
 501   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 502   emit_int8(modrm);
 503 }
 504 
 505 inline void Assembler::emit_modrm_disp8(int mod, int dst_enc, int src_enc,
 506                                         int disp) {
 507   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 508   emit_int16(modrm, disp & 0xFF);
 509 }
 510 
 511 inline void Assembler::emit_modrm_sib(int mod, int dst_enc, int src_enc,
 512                                       Address::ScaleFactor scale, int index_enc, int base_enc) {
 513   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 514   int sib = sib_encoding(scale, index_enc, base_enc);
 515   emit_int16(modrm, sib);
 516 }
 517 
 518 inline void Assembler::emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
 519                                             Address::ScaleFactor scale, int index_enc, int base_enc,
 520                                             int disp) {
 521   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 522   int sib = sib_encoding(scale, index_enc, base_enc);
 523   emit_int24(modrm, sib, disp & 0xFF);
 524 }
 525 
 526 void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc,
 527                                     Address::ScaleFactor scale, int disp,
 528                                     RelocationHolder const& rspec,
 529                                     int rip_relative_correction) {
 530   bool no_relocation = (rspec.type() == relocInfo::none);
 531 
 532   if (is_valid_encoding(base_enc)) {
 533     if (is_valid_encoding(index_enc)) {
 534       assert(scale != Address::no_scale, "inconsistent address");
 535       // [base + index*scale + disp]
 536       if (disp == 0 && no_relocation &&
 537           base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
 538         // [base + index*scale]
 539         // [00 reg 100][ss index base]
 540         emit_modrm_sib(0b00, reg_enc, 0b100,
 541                        scale, index_enc, base_enc);
 542       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 543         // [base + index*scale + imm8]
 544         // [01 reg 100][ss index base] imm8
 545         emit_modrm_sib_disp8(0b01, reg_enc, 0b100,
 546                              scale, index_enc, base_enc,
 547                              disp);
 548       } else {
 549         // [base + index*scale + disp32]
 550         // [10 reg 100][ss index base] disp32
 551         emit_modrm_sib(0b10, reg_enc, 0b100,
 552                        scale, index_enc, base_enc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     } else if (base_enc == rsp->encoding() LP64_ONLY(|| base_enc == r12->encoding())) {
 556       // [rsp + disp]
 557       if (disp == 0 && no_relocation) {
 558         // [rsp]
 559         // [00 reg 100][00 100 100]
 560         emit_modrm_sib(0b00, reg_enc, 0b100,
 561                        Address::times_1, 0b100, 0b100);
 562       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 563         // [rsp + imm8]
 564         // [01 reg 100][00 100 100] disp8
 565         emit_modrm_sib_disp8(0b01, reg_enc, 0b100,
 566                              Address::times_1, 0b100, 0b100,
 567                              disp);
 568       } else {
 569         // [rsp + imm32]
 570         // [10 reg 100][00 100 100] disp32
 571         emit_modrm_sib(0b10, reg_enc, 0b100,
 572                        Address::times_1, 0b100, 0b100);
 573         emit_data(disp, rspec, disp32_operand);
 574       }
 575     } else {
 576       // [base + disp]
 577       assert(base_enc != rsp->encoding() LP64_ONLY(&& base_enc != r12->encoding()), "illegal addressing mode");
 578       if (disp == 0 && no_relocation &&
 579           base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
 580         // [base]
 581         // [00 reg base]
 582         emit_modrm(0, reg_enc, base_enc);
 583       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 584         // [base + disp8]
 585         // [01 reg base] disp8
 586         emit_modrm_disp8(0b01, reg_enc, base_enc,
 587                          disp);
 588       } else {
 589         // [base + disp32]
 590         // [10 reg base] disp32
 591         emit_modrm(0b10, reg_enc, base_enc);
 592         emit_data(disp, rspec, disp32_operand);
 593       }
 594     }
 595   } else {
 596     if (is_valid_encoding(index_enc)) {
 597       assert(scale != Address::no_scale, "inconsistent address");
 598       // base == noreg
 599       // [index*scale + disp]
 600       // [00 reg 100][ss index 101] disp32
 601       emit_modrm_sib(0b00, reg_enc, 0b100,
 602                      scale, index_enc, 0b101 /* no base */);
 603       emit_data(disp, rspec, disp32_operand);
 604     } else if (!no_relocation) {
 605       // base == noreg, index == noreg
 606       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 607       // [00 reg 101] disp32
 608 
 609       emit_modrm(0b00, reg_enc, 0b101 /* no base */);
 610       // Note that the RIP-rel. correction applies to the generated
 611       // disp field, but _not_ to the target address in the rspec.
 612 
 613       // disp was created by converting the target address minus the pc
 614       // at the start of the instruction. That needs more correction here.
 615       // intptr_t disp = target - next_ip;
 616       assert(inst_mark() != NULL, "must be inside InstructionMark");
 617       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 618       int64_t adjusted = disp;
 619       // Do rip-rel adjustment for 64bit
 620       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 621       assert(is_simm32(adjusted),
 622              "must be 32bit offset (RIP relative address)");
 623       emit_data((int32_t) adjusted, rspec, disp32_operand);
 624 
 625     } else {
 626       // base == noreg, index == noreg, no_relocation == true
 627       // 32bit never did this, did everything as the rip-rel/disp code above
 628       // [disp] ABSOLUTE
 629       // [00 reg 100][00 100 101] disp32
 630       emit_modrm_sib(0b00, reg_enc, 0b100 /* no base */,
 631                      Address::times_1, 0b100, 0b101);
 632       emit_data(disp, rspec, disp32_operand);
 633     }
 634   }
 635 }
 636 
 637 void Assembler::emit_operand(Register reg, Register base, Register index,
 638                              Address::ScaleFactor scale, int disp,
 639                              RelocationHolder const& rspec,
 640                              int rip_relative_correction) {
 641   assert(!index->is_valid() || index != rsp, "illegal addressing mode");
 642   emit_operand_helper(raw_encode(reg), raw_encode(base), raw_encode(index),
 643                       scale, disp, rspec, rip_relative_correction);
 644 
 645 }
 646 void Assembler::emit_operand(XMMRegister xmmreg, Register base, Register index,
 647                              Address::ScaleFactor scale, int disp,
 648                              RelocationHolder const& rspec) {
 649   assert(!index->is_valid() || index != rsp, "illegal addressing mode");
 650   assert(xmmreg->encoding() < 16 || UseAVX > 2, "not supported");
 651   emit_operand_helper(raw_encode(xmmreg), raw_encode(base), raw_encode(index),
 652                       scale, disp, rspec);
 653 }
 654 
 655 void Assembler::emit_operand(XMMRegister xmmreg, Register base, XMMRegister xmmindex,
 656                              Address::ScaleFactor scale, int disp,
 657                              RelocationHolder const& rspec) {
 658   assert(xmmreg->encoding() < 16 || UseAVX > 2, "not supported");
 659   assert(xmmindex->encoding() < 16 || UseAVX > 2, "not supported");
 660   emit_operand_helper(raw_encode(xmmreg), raw_encode(base), raw_encode(xmmindex),
 661                       scale, disp, rspec, /* rip_relative_correction */ 0);
 662 }
 663 
 664 // Secret local extension to Assembler::WhichOperand:
 665 #define end_pc_operand (_WhichOperand_limit)
 666 
 667 address Assembler::locate_operand(address inst, WhichOperand which) {
 668   // Decode the given instruction, and return the address of
 669   // an embedded 32-bit operand word.
 670 
 671   // If "which" is disp32_operand, selects the displacement portion
 672   // of an effective address specifier.
 673   // If "which" is imm64_operand, selects the trailing immediate constant.
 674   // If "which" is call32_operand, selects the displacement of a call or jump.
 675   // Caller is responsible for ensuring that there is such an operand,
 676   // and that it is 32/64 bits wide.
 677 
 678   // If "which" is end_pc_operand, find the end of the instruction.
 679 
 680   address ip = inst;
 681   bool is_64bit = false;
 682 
 683   debug_only(bool has_disp32 = false);
 684   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 685 
 686   again_after_prefix:
 687   switch (0xFF & *ip++) {
 688 
 689   // These convenience macros generate groups of "case" labels for the switch.
 690 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 691 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 692              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 693 #define REP16(x) REP8((x)+0): \
 694               case REP8((x)+8)
 695 
 696   case CS_segment:
 697   case SS_segment:
 698   case DS_segment:
 699   case ES_segment:
 700   case FS_segment:
 701   case GS_segment:
 702     // Seems dubious
 703     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 704     assert(ip == inst+1, "only one prefix allowed");
 705     goto again_after_prefix;
 706 
 707   case 0x67:
 708   case REX:
 709   case REX_B:
 710   case REX_X:
 711   case REX_XB:
 712   case REX_R:
 713   case REX_RB:
 714   case REX_RX:
 715   case REX_RXB:
 716     NOT_LP64(assert(false, "64bit prefixes"));
 717     goto again_after_prefix;
 718 
 719   case REX_W:
 720   case REX_WB:
 721   case REX_WX:
 722   case REX_WXB:
 723   case REX_WR:
 724   case REX_WRB:
 725   case REX_WRX:
 726   case REX_WRXB:
 727     NOT_LP64(assert(false, "64bit prefixes"));
 728     is_64bit = true;
 729     goto again_after_prefix;
 730 
 731   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 732   case 0x88: // movb a, r
 733   case 0x89: // movl a, r
 734   case 0x8A: // movb r, a
 735   case 0x8B: // movl r, a
 736   case 0x8F: // popl a
 737     debug_only(has_disp32 = true);
 738     break;
 739 
 740   case 0x68: // pushq #32
 741     if (which == end_pc_operand) {
 742       return ip + 4;
 743     }
 744     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 745     return ip;                  // not produced by emit_operand
 746 
 747   case 0x66: // movw ... (size prefix)
 748     again_after_size_prefix2:
 749     switch (0xFF & *ip++) {
 750     case REX:
 751     case REX_B:
 752     case REX_X:
 753     case REX_XB:
 754     case REX_R:
 755     case REX_RB:
 756     case REX_RX:
 757     case REX_RXB:
 758     case REX_W:
 759     case REX_WB:
 760     case REX_WX:
 761     case REX_WXB:
 762     case REX_WR:
 763     case REX_WRB:
 764     case REX_WRX:
 765     case REX_WRXB:
 766       NOT_LP64(assert(false, "64bit prefix found"));
 767       goto again_after_size_prefix2;
 768     case 0x8B: // movw r, a
 769     case 0x89: // movw a, r
 770       debug_only(has_disp32 = true);
 771       break;
 772     case 0xC7: // movw a, #16
 773       debug_only(has_disp32 = true);
 774       tail_size = 2;  // the imm16
 775       break;
 776     case 0x0F: // several SSE/SSE2 variants
 777       ip--;    // reparse the 0x0F
 778       goto again_after_prefix;
 779     default:
 780       ShouldNotReachHere();
 781     }
 782     break;
 783 
 784   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 785     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 786     // these asserts are somewhat nonsensical
 787 #ifndef _LP64
 788     assert(which == imm_operand || which == disp32_operand,
 789            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 790 #else
 791     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 792            which == narrow_oop_operand && !is_64bit,
 793            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 794 #endif // _LP64
 795     return ip;
 796 
 797   case 0x69: // imul r, a, #32
 798   case 0xC7: // movl a, #32(oop?)
 799     tail_size = 4;
 800     debug_only(has_disp32 = true); // has both kinds of operands!
 801     break;
 802 
 803   case 0x0F: // movx..., etc.
 804     switch (0xFF & *ip++) {
 805     case 0x3A: // pcmpestri
 806       tail_size = 1;
 807     case 0x38: // ptest, pmovzxbw
 808       ip++; // skip opcode
 809       debug_only(has_disp32 = true); // has both kinds of operands!
 810       break;
 811 
 812     case 0x70: // pshufd r, r/a, #8
 813       debug_only(has_disp32 = true); // has both kinds of operands!
 814     case 0x73: // psrldq r, #8
 815       tail_size = 1;
 816       break;
 817 
 818     case 0x12: // movlps
 819     case 0x28: // movaps
 820     case 0x2E: // ucomiss
 821     case 0x2F: // comiss
 822     case 0x54: // andps
 823     case 0x55: // andnps
 824     case 0x56: // orps
 825     case 0x57: // xorps
 826     case 0x58: // addpd
 827     case 0x59: // mulpd
 828     case 0x6E: // movd
 829     case 0x7E: // movd
 830     case 0x6F: // movdq
 831     case 0x7F: // movdq
 832     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 833     case 0xFE: // paddd
 834       debug_only(has_disp32 = true);
 835       break;
 836 
 837     case 0xAD: // shrd r, a, %cl
 838     case 0xAF: // imul r, a
 839     case 0xBE: // movsbl r, a (movsxb)
 840     case 0xBF: // movswl r, a (movsxw)
 841     case 0xB6: // movzbl r, a (movzxb)
 842     case 0xB7: // movzwl r, a (movzxw)
 843     case REP16(0x40): // cmovl cc, r, a
 844     case 0xB0: // cmpxchgb
 845     case 0xB1: // cmpxchg
 846     case 0xC1: // xaddl
 847     case 0xC7: // cmpxchg8
 848     case REP16(0x90): // setcc a
 849       debug_only(has_disp32 = true);
 850       // fall out of the switch to decode the address
 851       break;
 852 
 853     case 0xC4: // pinsrw r, a, #8
 854       debug_only(has_disp32 = true);
 855     case 0xC5: // pextrw r, r, #8
 856       tail_size = 1;  // the imm8
 857       break;
 858 
 859     case 0xAC: // shrd r, a, #8
 860       debug_only(has_disp32 = true);
 861       tail_size = 1;  // the imm8
 862       break;
 863 
 864     case REP16(0x80): // jcc rdisp32
 865       if (which == end_pc_operand)  return ip + 4;
 866       assert(which == call32_operand, "jcc has no disp32 or imm");
 867       return ip;
 868     default:
 869       ShouldNotReachHere();
 870     }
 871     break;
 872 
 873   case 0x81: // addl a, #32; addl r, #32
 874     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 875     // on 32bit in the case of cmpl, the imm might be an oop
 876     tail_size = 4;
 877     debug_only(has_disp32 = true); // has both kinds of operands!
 878     break;
 879 
 880   case 0x83: // addl a, #8; addl r, #8
 881     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 882     debug_only(has_disp32 = true); // has both kinds of operands!
 883     tail_size = 1;
 884     break;
 885 
 886   case 0x9B:
 887     switch (0xFF & *ip++) {
 888     case 0xD9: // fnstcw a
 889       debug_only(has_disp32 = true);
 890       break;
 891     default:
 892       ShouldNotReachHere();
 893     }
 894     break;
 895 
 896   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 897   case REP4(0x10): // adc...
 898   case REP4(0x20): // and...
 899   case REP4(0x30): // xor...
 900   case REP4(0x08): // or...
 901   case REP4(0x18): // sbb...
 902   case REP4(0x28): // sub...
 903   case 0xF7: // mull a
 904   case 0x8D: // lea r, a
 905   case 0x87: // xchg r, a
 906   case REP4(0x38): // cmp...
 907   case 0x85: // test r, a
 908     debug_only(has_disp32 = true); // has both kinds of operands!
 909     break;
 910 
 911   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 912   case 0xC6: // movb a, #8
 913   case 0x80: // cmpb a, #8
 914   case 0x6B: // imul r, a, #8
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     tail_size = 1; // the imm8
 917     break;
 918 
 919   case 0xC4: // VEX_3bytes
 920   case 0xC5: // VEX_2bytes
 921     assert((UseAVX > 0), "shouldn't have VEX prefix");
 922     assert(ip == inst+1, "no prefixes allowed");
 923     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 924     // but they have prefix 0x0F and processed when 0x0F processed above.
 925     //
 926     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 927     // instructions (these instructions are not supported in 64-bit mode).
 928     // To distinguish them bits [7:6] are set in the VEX second byte since
 929     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 930     // those VEX bits REX and vvvv bits are inverted.
 931     //
 932     // Fortunately C2 doesn't generate these instructions so we don't need
 933     // to check for them in product version.
 934 
 935     // Check second byte
 936     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 937 
 938     int vex_opcode;
 939     // First byte
 940     if ((0xFF & *inst) == VEX_3bytes) {
 941       vex_opcode = VEX_OPCODE_MASK & *ip;
 942       ip++; // third byte
 943       is_64bit = ((VEX_W & *ip) == VEX_W);
 944     } else {
 945       vex_opcode = VEX_OPCODE_0F;
 946     }
 947     ip++; // opcode
 948     // To find the end of instruction (which == end_pc_operand).
 949     switch (vex_opcode) {
 950       case VEX_OPCODE_0F:
 951         switch (0xFF & *ip) {
 952         case 0x70: // pshufd r, r/a, #8
 953         case 0x71: // ps[rl|ra|ll]w r, #8
 954         case 0x72: // ps[rl|ra|ll]d r, #8
 955         case 0x73: // ps[rl|ra|ll]q r, #8
 956         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 957         case 0xC4: // pinsrw r, r, r/a, #8
 958         case 0xC5: // pextrw r/a, r, #8
 959         case 0xC6: // shufp[s|d] r, r, r/a, #8
 960           tail_size = 1;  // the imm8
 961           break;
 962         }
 963         break;
 964       case VEX_OPCODE_0F_3A:
 965         tail_size = 1;
 966         break;
 967     }
 968     ip++; // skip opcode
 969     debug_only(has_disp32 = true); // has both kinds of operands!
 970     break;
 971 
 972   case 0x62: // EVEX_4bytes
 973     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 974     assert(ip == inst+1, "no prefixes allowed");
 975     // no EVEX collisions, all instructions that have 0x62 opcodes
 976     // have EVEX versions and are subopcodes of 0x66
 977     ip++; // skip P0 and exmaine W in P1
 978     is_64bit = ((VEX_W & *ip) == VEX_W);
 979     ip++; // move to P2
 980     ip++; // skip P2, move to opcode
 981     // To find the end of instruction (which == end_pc_operand).
 982     switch (0xFF & *ip) {
 983     case 0x22: // pinsrd r, r/a, #8
 984     case 0x61: // pcmpestri r, r/a, #8
 985     case 0x70: // pshufd r, r/a, #8
 986     case 0x73: // psrldq r, #8
 987     case 0x1f: // evpcmpd/evpcmpq
 988     case 0x3f: // evpcmpb/evpcmpw
 989       tail_size = 1;  // the imm8
 990       break;
 991     default:
 992       break;
 993     }
 994     ip++; // skip opcode
 995     debug_only(has_disp32 = true); // has both kinds of operands!
 996     break;
 997 
 998   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 999   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
1000   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
1001   case 0xDD: // fld_d a; fst_d a; fstp_d a
1002   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
1003   case 0xDF: // fild_d a; fistp_d a
1004   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
1005   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
1006   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
1007     debug_only(has_disp32 = true);
1008     break;
1009 
1010   case 0xE8: // call rdisp32
1011   case 0xE9: // jmp  rdisp32
1012     if (which == end_pc_operand)  return ip + 4;
1013     assert(which == call32_operand, "call has no disp32 or imm");
1014     return ip;
1015 
1016   case 0xF0:                    // Lock
1017     goto again_after_prefix;
1018 
1019   case 0xF3:                    // For SSE
1020   case 0xF2:                    // For SSE2
1021     switch (0xFF & *ip++) {
1022     case REX:
1023     case REX_B:
1024     case REX_X:
1025     case REX_XB:
1026     case REX_R:
1027     case REX_RB:
1028     case REX_RX:
1029     case REX_RXB:
1030     case REX_W:
1031     case REX_WB:
1032     case REX_WX:
1033     case REX_WXB:
1034     case REX_WR:
1035     case REX_WRB:
1036     case REX_WRX:
1037     case REX_WRXB:
1038       NOT_LP64(assert(false, "found 64bit prefix"));
1039       ip++;
1040     default:
1041       ip++;
1042     }
1043     debug_only(has_disp32 = true); // has both kinds of operands!
1044     break;
1045 
1046   default:
1047     ShouldNotReachHere();
1048 
1049 #undef REP8
1050 #undef REP16
1051   }
1052 
1053   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
1054 #ifdef _LP64
1055   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1056 #else
1057   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1058   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1059 #endif // LP64
1060   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1061 
1062   // parse the output of emit_operand
1063   int op2 = 0xFF & *ip++;
1064   int base = op2 & 0x07;
1065   int op3 = -1;
1066   const int b100 = 4;
1067   const int b101 = 5;
1068   if (base == b100 && (op2 >> 6) != 3) {
1069     op3 = 0xFF & *ip++;
1070     base = op3 & 0x07;   // refetch the base
1071   }
1072   // now ip points at the disp (if any)
1073 
1074   switch (op2 >> 6) {
1075   case 0:
1076     // [00 reg  100][ss index base]
1077     // [00 reg  100][00   100  esp]
1078     // [00 reg base]
1079     // [00 reg  100][ss index  101][disp32]
1080     // [00 reg  101]               [disp32]
1081 
1082     if (base == b101) {
1083       if (which == disp32_operand)
1084         return ip;              // caller wants the disp32
1085       ip += 4;                  // skip the disp32
1086     }
1087     break;
1088 
1089   case 1:
1090     // [01 reg  100][ss index base][disp8]
1091     // [01 reg  100][00   100  esp][disp8]
1092     // [01 reg base]               [disp8]
1093     ip += 1;                    // skip the disp8
1094     break;
1095 
1096   case 2:
1097     // [10 reg  100][ss index base][disp32]
1098     // [10 reg  100][00   100  esp][disp32]
1099     // [10 reg base]               [disp32]
1100     if (which == disp32_operand)
1101       return ip;                // caller wants the disp32
1102     ip += 4;                    // skip the disp32
1103     break;
1104 
1105   case 3:
1106     // [11 reg base]  (not a memory addressing mode)
1107     break;
1108   }
1109 
1110   if (which == end_pc_operand) {
1111     return ip + tail_size;
1112   }
1113 
1114 #ifdef _LP64
1115   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1116 #else
1117   assert(which == imm_operand, "instruction has only an imm field");
1118 #endif // LP64
1119   return ip;
1120 }
1121 
1122 address Assembler::locate_next_instruction(address inst) {
1123   // Secretly share code with locate_operand:
1124   return locate_operand(inst, end_pc_operand);
1125 }
1126 
1127 
1128 #ifdef ASSERT
1129 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1130   address inst = inst_mark();
1131   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1132   address opnd;
1133 
1134   Relocation* r = rspec.reloc();
1135   if (r->type() == relocInfo::none) {
1136     return;
1137   } else if (r->is_call() || format == call32_operand) {
1138     // assert(format == imm32_operand, "cannot specify a nonzero format");
1139     opnd = locate_operand(inst, call32_operand);
1140   } else if (r->is_data()) {
1141     assert(format == imm_operand || format == disp32_operand
1142            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1143     opnd = locate_operand(inst, (WhichOperand)format);
1144   } else {
1145     assert(format == imm_operand, "cannot specify a format");
1146     return;
1147   }
1148   assert(opnd == pc(), "must put operand where relocs can find it");
1149 }
1150 #endif // ASSERT
1151 
1152 void Assembler::emit_operand(Register reg, Address adr,
1153                              int rip_relative_correction) {
1154   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1155                adr._rspec,
1156                rip_relative_correction);
1157 }
1158 
1159 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1160     if (adr.isxmmindex()) {
1161        emit_operand(reg, adr._base, adr._xmmindex, adr._scale, adr._disp, adr._rspec);
1162     } else {
1163        emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1164        adr._rspec);
1165     }
1166 }
1167 
1168 // Now the Assembler instructions (identical for 32/64 bits)
1169 
1170 void Assembler::adcl(Address dst, int32_t imm32) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_arith_operand(0x81, rdx, dst, imm32);
1174 }
1175 
1176 void Assembler::adcl(Address dst, Register src) {
1177   InstructionMark im(this);
1178   prefix(dst, src);
1179   emit_int8(0x11);
1180   emit_operand(src, dst);
1181 }
1182 
1183 void Assembler::adcl(Register dst, int32_t imm32) {
1184   prefix(dst);
1185   emit_arith(0x81, 0xD0, dst, imm32);
1186 }
1187 
1188 void Assembler::adcl(Register dst, Address src) {
1189   InstructionMark im(this);
1190   prefix(src, dst);
1191   emit_int8(0x13);
1192   emit_operand(dst, src);
1193 }
1194 
1195 void Assembler::adcl(Register dst, Register src) {
1196   (void) prefix_and_encode(dst->encoding(), src->encoding());
1197   emit_arith(0x13, 0xC0, dst, src);
1198 }
1199 
1200 void Assembler::addl(Address dst, int32_t imm32) {
1201   InstructionMark im(this);
1202   prefix(dst);
1203   emit_arith_operand(0x81, rax, dst, imm32);
1204 }
1205 
1206 void Assembler::addb(Address dst, int imm8) {
1207   InstructionMark im(this);
1208   prefix(dst);
1209   emit_int8((unsigned char)0x80);
1210   emit_operand(rax, dst, 1);
1211   emit_int8(imm8);
1212 }
1213 
1214 void Assembler::addw(Register dst, Register src) {
1215   (void)prefix_and_encode(dst->encoding(), src->encoding());
1216   emit_arith(0x03, 0xC0, dst, src);
1217 }
1218 
1219 void Assembler::addw(Address dst, int imm16) {
1220   InstructionMark im(this);
1221   emit_int8(0x66);
1222   prefix(dst);
1223   emit_int8((unsigned char)0x81);
1224   emit_operand(rax, dst, 2);
1225   emit_int16(imm16);
1226 }
1227 
1228 void Assembler::addl(Address dst, Register src) {
1229   InstructionMark im(this);
1230   prefix(dst, src);
1231   emit_int8(0x01);
1232   emit_operand(src, dst);
1233 }
1234 
1235 void Assembler::addl(Register dst, int32_t imm32) {
1236   prefix(dst);
1237   emit_arith(0x81, 0xC0, dst, imm32);
1238 }
1239 
1240 void Assembler::addl(Register dst, Address src) {
1241   InstructionMark im(this);
1242   prefix(src, dst);
1243   emit_int8(0x03);
1244   emit_operand(dst, src);
1245 }
1246 
1247 void Assembler::addl(Register dst, Register src) {
1248   (void) prefix_and_encode(dst->encoding(), src->encoding());
1249   emit_arith(0x03, 0xC0, dst, src);
1250 }
1251 
1252 void Assembler::addr_nop_4() {
1253   assert(UseAddressNop, "no CPU support");
1254   // 4 bytes: NOP DWORD PTR [EAX+0]
1255   emit_int32(0x0F,
1256              0x1F,
1257              0x40, // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1258              0);   // 8-bits offset (1 byte)
1259 }
1260 
1261 void Assembler::addr_nop_5() {
1262   assert(UseAddressNop, "no CPU support");
1263   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1264   emit_int32(0x0F,
1265              0x1F,
1266              0x44,  // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1267              0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1268   emit_int8(0);     // 8-bits offset (1 byte)
1269 }
1270 
1271 void Assembler::addr_nop_7() {
1272   assert(UseAddressNop, "no CPU support");
1273   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1274   emit_int24(0x0F,
1275              0x1F,
1276              (unsigned char)0x80);
1277                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1278   emit_int32(0);   // 32-bits offset (4 bytes)
1279 }
1280 
1281 void Assembler::addr_nop_8() {
1282   assert(UseAddressNop, "no CPU support");
1283   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1284   emit_int32(0x0F,
1285              0x1F,
1286              (unsigned char)0x84,
1287                     // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1288              0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1289   emit_int32(0);    // 32-bits offset (4 bytes)
1290 }
1291 
1292 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1293   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1294   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1295   attributes.set_rex_vex_w_reverted();
1296   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1297   emit_int16(0x58, (0xC0 | encode));
1298 }
1299 
1300 void Assembler::addsd(XMMRegister dst, Address src) {
1301   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1302   InstructionMark im(this);
1303   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1304   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1305   attributes.set_rex_vex_w_reverted();
1306   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1307   emit_int8(0x58);
1308   emit_operand(dst, src);
1309 }
1310 
1311 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1312   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1313   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1315   emit_int16(0x58, (0xC0 | encode));
1316 }
1317 
1318 void Assembler::addss(XMMRegister dst, Address src) {
1319   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1320   InstructionMark im(this);
1321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1322   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1323   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1324   emit_int8(0x58);
1325   emit_operand(dst, src);
1326 }
1327 
1328 void Assembler::aesdec(XMMRegister dst, Address src) {
1329   assert(VM_Version::supports_aes(), "");
1330   InstructionMark im(this);
1331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1332   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1333   emit_int8((unsigned char)0xDE);
1334   emit_operand(dst, src);
1335 }
1336 
1337 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1338   assert(VM_Version::supports_aes(), "");
1339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1340   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1341   emit_int16((unsigned char)0xDE, (0xC0 | encode));
1342 }
1343 
1344 void Assembler::vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1345   assert(VM_Version::supports_avx512_vaes(), "");
1346   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1347   attributes.set_is_evex_instruction();
1348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1349   emit_int16((unsigned char)0xDE, (0xC0 | encode));
1350 }
1351 
1352 
1353 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1354   assert(VM_Version::supports_aes(), "");
1355   InstructionMark im(this);
1356   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1357   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1358   emit_int8((unsigned char)0xDF);
1359   emit_operand(dst, src);
1360 }
1361 
1362 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1363   assert(VM_Version::supports_aes(), "");
1364   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1365   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1366   emit_int16((unsigned char)0xDF, (0xC0 | encode));
1367 }
1368 
1369 void Assembler::vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1370   assert(VM_Version::supports_avx512_vaes(), "");
1371   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1372   attributes.set_is_evex_instruction();
1373   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1374   emit_int16((unsigned char)0xDF, (0xC0 | encode));
1375 }
1376 
1377 void Assembler::aesenc(XMMRegister dst, Address src) {
1378   assert(VM_Version::supports_aes(), "");
1379   InstructionMark im(this);
1380   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1381   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1382   emit_int8((unsigned char)0xDC);
1383   emit_operand(dst, src);
1384 }
1385 
1386 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1387   assert(VM_Version::supports_aes(), "");
1388   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1389   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1390   emit_int16((unsigned char)0xDC, 0xC0 | encode);
1391 }
1392 
1393 void Assembler::vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1394   assert(VM_Version::supports_avx512_vaes(), "requires vaes support/enabling");
1395   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1396   attributes.set_is_evex_instruction();
1397   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1398   emit_int16((unsigned char)0xDC, (0xC0 | encode));
1399 }
1400 
1401 void Assembler::aesenclast(XMMRegister dst, Address src) {
1402   assert(VM_Version::supports_aes(), "");
1403   InstructionMark im(this);
1404   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1405   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1406   emit_int8((unsigned char)0xDD);
1407   emit_operand(dst, src);
1408 }
1409 
1410 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1411   assert(VM_Version::supports_aes(), "");
1412   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1413   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1414   emit_int16((unsigned char)0xDD, (0xC0 | encode));
1415 }
1416 
1417 void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1418   assert(VM_Version::supports_avx512_vaes(), "requires vaes support/enabling");
1419   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1420   attributes.set_is_evex_instruction();
1421   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1422   emit_int16((unsigned char)0xDD, (0xC0 | encode));
1423 }
1424 
1425 void Assembler::andw(Register dst, Register src) {
1426   (void)prefix_and_encode(dst->encoding(), src->encoding());
1427   emit_arith(0x23, 0xC0, dst, src);
1428 }
1429 
1430 void Assembler::andl(Address dst, int32_t imm32) {
1431   InstructionMark im(this);
1432   prefix(dst);
1433   emit_int8((unsigned char)0x81);
1434   emit_operand(rsp, dst, 4);
1435   emit_int32(imm32);
1436 }
1437 
1438 void Assembler::andl(Register dst, int32_t imm32) {
1439   prefix(dst);
1440   emit_arith(0x81, 0xE0, dst, imm32);
1441 }
1442 
1443 void Assembler::andl(Register dst, Address src) {
1444   InstructionMark im(this);
1445   prefix(src, dst);
1446   emit_int8(0x23);
1447   emit_operand(dst, src);
1448 }
1449 
1450 void Assembler::andl(Register dst, Register src) {
1451   (void) prefix_and_encode(dst->encoding(), src->encoding());
1452   emit_arith(0x23, 0xC0, dst, src);
1453 }
1454 
1455 void Assembler::andnl(Register dst, Register src1, Register src2) {
1456   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1457   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1458   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1459   emit_int16((unsigned char)0xF2, (0xC0 | encode));
1460 }
1461 
1462 void Assembler::andnl(Register dst, Register src1, Address src2) {
1463   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1464   InstructionMark im(this);
1465   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1466   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1467   emit_int8((unsigned char)0xF2);
1468   emit_operand(dst, src2);
1469 }
1470 
1471 void Assembler::bsfl(Register dst, Register src) {
1472   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1473   emit_int24(0x0F,
1474              (unsigned char)0xBC,
1475              0xC0 | encode);
1476 }
1477 
1478 void Assembler::bsrl(Register dst, Register src) {
1479   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1480   emit_int24(0x0F,
1481              (unsigned char)0xBD,
1482              0xC0 | encode);
1483 }
1484 
1485 void Assembler::bswapl(Register reg) { // bswap
1486   int encode = prefix_and_encode(reg->encoding());
1487   emit_int16(0x0F, (0xC8 | encode));
1488 }
1489 
1490 void Assembler::blsil(Register dst, Register src) {
1491   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1492   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1493   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1494   emit_int16((unsigned char)0xF3, (0xC0 | encode));
1495 }
1496 
1497 void Assembler::blsil(Register dst, Address src) {
1498   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1499   InstructionMark im(this);
1500   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1501   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1502   emit_int8((unsigned char)0xF3);
1503   emit_operand(rbx, src);
1504 }
1505 
1506 void Assembler::blsmskl(Register dst, Register src) {
1507   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1508   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1509   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1510   emit_int16((unsigned char)0xF3,
1511              0xC0 | encode);
1512 }
1513 
1514 void Assembler::blsmskl(Register dst, Address src) {
1515   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1516   InstructionMark im(this);
1517   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1518   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1519   emit_int8((unsigned char)0xF3);
1520   emit_operand(rdx, src);
1521 }
1522 
1523 void Assembler::blsrl(Register dst, Register src) {
1524   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1525   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1526   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1527   emit_int16((unsigned char)0xF3, (0xC0 | encode));
1528 }
1529 
1530 void Assembler::blsrl(Register dst, Address src) {
1531   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1532   InstructionMark im(this);
1533   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1534   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1535   emit_int8((unsigned char)0xF3);
1536   emit_operand(rcx, src);
1537 }
1538 
1539 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1540   // suspect disp32 is always good
1541   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1542 
1543   if (L.is_bound()) {
1544     const int long_size = 5;
1545     int offs = (int)( target(L) - pc() );
1546     assert(offs <= 0, "assembler error");
1547     InstructionMark im(this);
1548     // 1110 1000 #32-bit disp
1549     emit_int8((unsigned char)0xE8);
1550     emit_data(offs - long_size, rtype, operand);
1551   } else {
1552     InstructionMark im(this);
1553     // 1110 1000 #32-bit disp
1554     L.add_patch_at(code(), locator());
1555 
1556     emit_int8((unsigned char)0xE8);
1557     emit_data(int(0), rtype, operand);
1558   }
1559 }
1560 
1561 void Assembler::call(Register dst) {
1562   int encode = prefix_and_encode(dst->encoding());
1563   emit_int16((unsigned char)0xFF, (0xD0 | encode));
1564 }
1565 
1566 
1567 void Assembler::call(Address adr) {
1568   InstructionMark im(this);
1569   prefix(adr);
1570   emit_int8((unsigned char)0xFF);
1571   emit_operand(rdx, adr);
1572 }
1573 
1574 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1575   InstructionMark im(this);
1576   emit_int8((unsigned char)0xE8);
1577   intptr_t disp = entry - (pc() + sizeof(int32_t));
1578   // Entry is NULL in case of a scratch emit.
1579   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1580   // Technically, should use call32_operand, but this format is
1581   // implied by the fact that we're emitting a call instruction.
1582 
1583   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1584   emit_data((int) disp, rspec, operand);
1585 }
1586 
1587 void Assembler::cdql() {
1588   emit_int8((unsigned char)0x99);
1589 }
1590 
1591 void Assembler::cld() {
1592   emit_int8((unsigned char)0xFC);
1593 }
1594 
1595 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1596   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1597   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1598   emit_int24(0x0F,
1599              0x40 | cc,
1600              0xC0 | encode);
1601 }
1602 
1603 
1604 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1605   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1606   prefix(src, dst);
1607   emit_int16(0x0F, (0x40 | cc));
1608   emit_operand(dst, src);
1609 }
1610 
1611 void Assembler::cmpb(Address dst, int imm8) {
1612   InstructionMark im(this);
1613   prefix(dst);
1614   emit_int8((unsigned char)0x80);
1615   emit_operand(rdi, dst, 1);
1616   emit_int8(imm8);
1617 }
1618 
1619 void Assembler::cmpl(Address dst, int32_t imm32) {
1620   InstructionMark im(this);
1621   prefix(dst);
1622   emit_int8((unsigned char)0x81);
1623   emit_operand(rdi, dst, 4);
1624   emit_int32(imm32);
1625 }
1626 
1627 void Assembler::cmpl(Register dst, int32_t imm32) {
1628   prefix(dst);
1629   emit_arith(0x81, 0xF8, dst, imm32);
1630 }
1631 
1632 void Assembler::cmpl(Register dst, Register src) {
1633   (void) prefix_and_encode(dst->encoding(), src->encoding());
1634   emit_arith(0x3B, 0xC0, dst, src);
1635 }
1636 
1637 void Assembler::cmpl(Register dst, Address  src) {
1638   InstructionMark im(this);
1639   prefix(src, dst);
1640   emit_int8(0x3B);
1641   emit_operand(dst, src);
1642 }
1643 
1644 void Assembler::cmpw(Address dst, int imm16) {
1645   InstructionMark im(this);
1646   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1647   emit_int16(0x66, (unsigned char)0x81);
1648   emit_operand(rdi, dst, 2);
1649   emit_int16(imm16);
1650 }
1651 
1652 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1653 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1654 // The ZF is set if the compared values were equal, and cleared otherwise.
1655 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1656   InstructionMark im(this);
1657   prefix(adr, reg);
1658   emit_int16(0x0F, (unsigned char)0xB1);
1659   emit_operand(reg, adr);
1660 }
1661 
1662 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1663 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1664 // The ZF is set if the compared values were equal, and cleared otherwise.
1665 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1666   InstructionMark im(this);
1667   prefix(adr, reg, true);
1668   emit_int16(0x0F, (unsigned char)0xB0);
1669   emit_operand(reg, adr);
1670 }
1671 
1672 void Assembler::comisd(XMMRegister dst, Address src) {
1673   // NOTE: dbx seems to decode this as comiss even though the
1674   // 0x66 is there. Strangly ucomisd comes out correct
1675   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1676   InstructionMark im(this);
1677   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1678   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1679   attributes.set_rex_vex_w_reverted();
1680   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1681   emit_int8(0x2F);
1682   emit_operand(dst, src);
1683 }
1684 
1685 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1686   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1687   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1688   attributes.set_rex_vex_w_reverted();
1689   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1690   emit_int16(0x2F, (0xC0 | encode));
1691 }
1692 
1693 void Assembler::comiss(XMMRegister dst, Address src) {
1694   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1695   InstructionMark im(this);
1696   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1697   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1698   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1699   emit_int8(0x2F);
1700   emit_operand(dst, src);
1701 }
1702 
1703 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1704   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1706   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1707   emit_int16(0x2F, (0xC0 | encode));
1708 }
1709 
1710 void Assembler::cpuid() {
1711   emit_int16(0x0F, (unsigned char)0xA2);
1712 }
1713 
1714 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1715 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1716 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1717 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1718 //
1719 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1720 //
1721 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1722 //
1723 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1724 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1725   assert(VM_Version::supports_sse4_2(), "");
1726   int8_t w = 0x01;
1727   Prefix p = Prefix_EMPTY;
1728 
1729   emit_int8((unsigned char)0xF2);
1730   switch (sizeInBytes) {
1731   case 1:
1732     w = 0;
1733     break;
1734   case 2:
1735   case 4:
1736     break;
1737   LP64_ONLY(case 8:)
1738     // This instruction is not valid in 32 bits
1739     // Note:
1740     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1741     //
1742     // Page B - 72   Vol. 2C says
1743     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1744     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1745     //                                                                            F0!!!
1746     // while 3 - 208 Vol. 2A
1747     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1748     //
1749     // the 0 on a last bit is reserved for a different flavor of this instruction :
1750     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1751     p = REX_W;
1752     break;
1753   default:
1754     assert(0, "Unsupported value for a sizeInBytes argument");
1755     break;
1756   }
1757   LP64_ONLY(prefix(crc, v, p);)
1758   emit_int32(0x0F,
1759              0x38,
1760              0xF0 | w,
1761              0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1762 }
1763 
1764 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1765   assert(VM_Version::supports_sse4_2(), "");
1766   InstructionMark im(this);
1767   int8_t w = 0x01;
1768   Prefix p = Prefix_EMPTY;
1769 
1770   emit_int8((int8_t)0xF2);
1771   switch (sizeInBytes) {
1772   case 1:
1773     w = 0;
1774     break;
1775   case 2:
1776   case 4:
1777     break;
1778   LP64_ONLY(case 8:)
1779     // This instruction is not valid in 32 bits
1780     p = REX_W;
1781     break;
1782   default:
1783     assert(0, "Unsupported value for a sizeInBytes argument");
1784     break;
1785   }
1786   LP64_ONLY(prefix(crc, adr, p);)
1787   emit_int24(0x0F, 0x38, (0xF0 | w));
1788   emit_operand(crc, adr);
1789 }
1790 
1791 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1792   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1793   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1794   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1795   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1796 }
1797 
1798 void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1799   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1800   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1801   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1802   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1803 }
1804 
1805 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1806   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1807   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1808   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1809   emit_int16(0x5B, (0xC0 | encode));
1810 }
1811 
1812 void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1813   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1814   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1815   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1816   emit_int16(0x5B, (0xC0 | encode));
1817 }
1818 
1819 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1820   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1821   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1822   attributes.set_rex_vex_w_reverted();
1823   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1824   emit_int16(0x5A, (0xC0 | encode));
1825 }
1826 
1827 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1828   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1829   InstructionMark im(this);
1830   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1831   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1832   attributes.set_rex_vex_w_reverted();
1833   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1834   emit_int8(0x5A);
1835   emit_operand(dst, src);
1836 }
1837 
1838 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1840   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1841   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1842   emit_int16(0x2A, (0xC0 | encode));
1843 }
1844 
1845 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1846   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1847   InstructionMark im(this);
1848   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1849   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1850   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1851   emit_int8(0x2A);
1852   emit_operand(dst, src);
1853 }
1854 
1855 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1856   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1857   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1858   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1859   emit_int16(0x2A, (0xC0 | encode));
1860 }
1861 
1862 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1863   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1864   InstructionMark im(this);
1865   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1866   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1867   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1868   emit_int8(0x2A);
1869   emit_operand(dst, src);
1870 }
1871 
1872 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1873   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1874   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1875   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1876   emit_int16(0x2A, (0xC0 | encode));
1877 }
1878 
1879 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1882   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1883   emit_int16(0x5A, (0xC0 | encode));
1884 }
1885 
1886 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1887   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1888   InstructionMark im(this);
1889   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1890   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1891   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1892   emit_int8(0x5A);
1893   emit_operand(dst, src);
1894 }
1895 
1896 
1897 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1898   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1899   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1900   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1901   emit_int16(0x2C, (0xC0 | encode));
1902 }
1903 
1904 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1905   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1907   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1908   emit_int16(0x2C, (0xC0 | encode));
1909 }
1910 
1911 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1912   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1913   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1914   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1915   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1916   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1917 }
1918 
1919 void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
1920   assert(VM_Version::supports_ssse3(), "");
1921   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1922   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1923   emit_int16(0x1C, (0xC0 | encode));
1924 }
1925 
1926 void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
1927   assert(VM_Version::supports_ssse3(), "");
1928   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1929   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1930   emit_int16(0x1D, (0xC0 | encode));
1931 }
1932 
1933 void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
1934   assert(VM_Version::supports_ssse3(), "");
1935   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1936   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1937   emit_int16(0x1E, (0xC0 | encode));
1938 }
1939 
1940 void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
1941   assert(vector_len == AVX_128bit ? VM_Version::supports_avx()      :
1942          vector_len == AVX_256bit ? VM_Version::supports_avx2()     :
1943          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
1944   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1945   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1946   emit_int16(0x1C, (0xC0 | encode));
1947 }
1948 
1949 void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
1950   assert(vector_len == AVX_128bit ? VM_Version::supports_avx()      :
1951          vector_len == AVX_256bit ? VM_Version::supports_avx2()     :
1952          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "");
1953   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1954   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1955   emit_int16(0x1D, (0xC0 | encode));
1956 }
1957 
1958 void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
1959   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
1960   vector_len == AVX_256bit? VM_Version::supports_avx2() :
1961   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, "");
1962   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1963   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1964   emit_int16(0x1E, (0xC0 | encode));
1965 }
1966 
1967 void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
1968   assert(UseAVX > 2, "");
1969   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1970   attributes.set_is_evex_instruction();
1971   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1972   emit_int16(0x1F, (0xC0 | encode));
1973 }
1974 
1975 void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1976   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1977   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1978   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1979   emit_int16(0x5A, (0xC0 | encode));
1980 }
1981 
1982 void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1983   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1984   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1985   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1986   attributes.set_rex_vex_w_reverted();
1987   emit_int16(0x5A, (0xC0 | encode));
1988 }
1989 
1990 void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1991   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
1992   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1993   attributes.set_is_evex_instruction();
1994   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1995   emit_int16(0x5B, (0xC0 | encode));
1996 }
1997 
1998 void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1999   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
2000   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2001   attributes.set_is_evex_instruction();
2002   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2003   emit_int16((unsigned char)0xE6, (0xC0 | encode));
2004 }
2005 
2006 void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
2007   assert(UseAVX > 2  && VM_Version::supports_avx512bw(), "");
2008   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2009   attributes.set_is_evex_instruction();
2010   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2011   emit_int16(0x30, (0xC0 | encode));
2012 }
2013 
2014 void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) {
2015   assert(UseAVX > 2, "");
2016   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2017   attributes.set_is_evex_instruction();
2018   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2019   emit_int16(0x33, (0xC0 | encode));
2020 }
2021 
2022 void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) {
2023   assert(UseAVX > 2, "");
2024   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2025   attributes.set_is_evex_instruction();
2026   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2027   emit_int16(0x31, (0xC0 | encode));
2028 }
2029 
2030 void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) {
2031   assert(UseAVX > 2, "");
2032   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2033   attributes.set_is_evex_instruction();
2034   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2035   emit_int16(0x35, (0xC0 | encode));
2036 }
2037 
2038 void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) {
2039   assert(UseAVX > 2, "");
2040   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2041   attributes.set_is_evex_instruction();
2042   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2043   emit_int16(0x32, (0xC0 | encode));
2044 }
2045 
2046 void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
2047   assert(UseAVX > 2, "");
2048   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2049   attributes.set_is_evex_instruction();
2050   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2051   emit_int16(0x34, (0xC0 | encode));
2052 }
2053 
2054 void Assembler::decl(Address dst) {
2055   // Don't use it directly. Use MacroAssembler::decrement() instead.
2056   InstructionMark im(this);
2057   prefix(dst);
2058   emit_int8((unsigned char)0xFF);
2059   emit_operand(rcx, dst);
2060 }
2061 
2062 void Assembler::divsd(XMMRegister dst, Address src) {
2063   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2064   InstructionMark im(this);
2065   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2066   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2067   attributes.set_rex_vex_w_reverted();
2068   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2069   emit_int8(0x5E);
2070   emit_operand(dst, src);
2071 }
2072 
2073 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
2074   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2075   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2076   attributes.set_rex_vex_w_reverted();
2077   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2078   emit_int16(0x5E, (0xC0 | encode));
2079 }
2080 
2081 void Assembler::divss(XMMRegister dst, Address src) {
2082   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2083   InstructionMark im(this);
2084   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2085   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2086   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2087   emit_int8(0x5E);
2088   emit_operand(dst, src);
2089 }
2090 
2091 void Assembler::divss(XMMRegister dst, XMMRegister src) {
2092   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2093   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2094   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2095   emit_int16(0x5E, (0xC0 | encode));
2096 }
2097 
2098 void Assembler::hlt() {
2099   emit_int8((unsigned char)0xF4);
2100 }
2101 
2102 void Assembler::idivl(Register src) {
2103   int encode = prefix_and_encode(src->encoding());
2104   emit_int16((unsigned char)0xF7, (0xF8 | encode));
2105 }
2106 
2107 void Assembler::divl(Register src) { // Unsigned
2108   int encode = prefix_and_encode(src->encoding());
2109   emit_int16((unsigned char)0xF7, (0xF0 | encode));
2110 }
2111 
2112 void Assembler::imull(Register src) {
2113   int encode = prefix_and_encode(src->encoding());
2114   emit_int16((unsigned char)0xF7, (0xE8 | encode));
2115 }
2116 
2117 void Assembler::imull(Register dst, Register src) {
2118   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2119   emit_int24(0x0F,
2120              (unsigned char)0xAF,
2121              (0xC0 | encode));
2122 }
2123 
2124 
2125 void Assembler::imull(Register dst, Register src, int value) {
2126   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2127   if (is8bit(value)) {
2128     emit_int24(0x6B, (0xC0 | encode), value & 0xFF);
2129   } else {
2130     emit_int16(0x69, (0xC0 | encode));
2131     emit_int32(value);
2132   }
2133 }
2134 
2135 void Assembler::imull(Register dst, Address src) {
2136   InstructionMark im(this);
2137   prefix(src, dst);
2138   emit_int16(0x0F, (unsigned char)0xAF);
2139   emit_operand(dst, src);
2140 }
2141 
2142 
2143 void Assembler::incl(Address dst) {
2144   // Don't use it directly. Use MacroAssembler::increment() instead.
2145   InstructionMark im(this);
2146   prefix(dst);
2147   emit_int8((unsigned char)0xFF);
2148   emit_operand(rax, dst);
2149 }
2150 
2151 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
2152   InstructionMark im(this);
2153   assert((0 <= cc) && (cc < 16), "illegal cc");
2154   if (L.is_bound()) {
2155     address dst = target(L);
2156     assert(dst != NULL, "jcc most probably wrong");
2157 
2158     const int short_size = 2;
2159     const int long_size = 6;
2160     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
2161     if (maybe_short && is8bit(offs - short_size)) {
2162       // 0111 tttn #8-bit disp
2163       emit_int16(0x70 | cc, (offs - short_size) & 0xFF);
2164     } else {
2165       // 0000 1111 1000 tttn #32-bit disp
2166       assert(is_simm32(offs - long_size),
2167              "must be 32bit offset (call4)");
2168       emit_int16(0x0F, (0x80 | cc));
2169       emit_int32(offs - long_size);
2170     }
2171   } else {
2172     // Note: could eliminate cond. jumps to this jump if condition
2173     //       is the same however, seems to be rather unlikely case.
2174     // Note: use jccb() if label to be bound is very close to get
2175     //       an 8-bit displacement
2176     L.add_patch_at(code(), locator());
2177     emit_int16(0x0F, (0x80 | cc));
2178     emit_int32(0);
2179   }
2180 }
2181 
2182 void Assembler::jccb_0(Condition cc, Label& L, const char* file, int line) {
2183   if (L.is_bound()) {
2184     const int short_size = 2;
2185     address entry = target(L);
2186 #ifdef ASSERT
2187     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2188     intptr_t delta = short_branch_delta();
2189     if (delta != 0) {
2190       dist += (dist < 0 ? (-delta) :delta);
2191     }
2192     assert(is8bit(dist), "Dispacement too large for a short jmp at %s:%d", file, line);
2193 #endif
2194     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2195     // 0111 tttn #8-bit disp
2196     emit_int16(0x70 | cc, (offs - short_size) & 0xFF);
2197   } else {
2198     InstructionMark im(this);
2199     L.add_patch_at(code(), locator(), file, line);
2200     emit_int16(0x70 | cc, 0);
2201   }
2202 }
2203 
2204 void Assembler::jmp(Address adr) {
2205   InstructionMark im(this);
2206   prefix(adr);
2207   emit_int8((unsigned char)0xFF);
2208   emit_operand(rsp, adr);
2209 }
2210 
2211 void Assembler::jmp(Label& L, bool maybe_short) {
2212   if (L.is_bound()) {
2213     address entry = target(L);
2214     assert(entry != NULL, "jmp most probably wrong");
2215     InstructionMark im(this);
2216     const int short_size = 2;
2217     const int long_size = 5;
2218     intptr_t offs = entry - pc();
2219     if (maybe_short && is8bit(offs - short_size)) {
2220       emit_int16((unsigned char)0xEB, ((offs - short_size) & 0xFF));
2221     } else {
2222       emit_int8((unsigned char)0xE9);
2223       emit_int32(offs - long_size);
2224     }
2225   } else {
2226     // By default, forward jumps are always 32-bit displacements, since
2227     // we can't yet know where the label will be bound.  If you're sure that
2228     // the forward jump will not run beyond 256 bytes, use jmpb to
2229     // force an 8-bit displacement.
2230     InstructionMark im(this);
2231     L.add_patch_at(code(), locator());
2232     emit_int8((unsigned char)0xE9);
2233     emit_int32(0);
2234   }
2235 }
2236 
2237 void Assembler::jmp(Register entry) {
2238   int encode = prefix_and_encode(entry->encoding());
2239   emit_int16((unsigned char)0xFF, (0xE0 | encode));
2240 }
2241 
2242 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2243   InstructionMark im(this);
2244   emit_int8((unsigned char)0xE9);
2245   assert(dest != NULL, "must have a target");
2246   intptr_t disp = dest - (pc() + sizeof(int32_t));
2247   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2248   emit_data(disp, rspec.reloc(), call32_operand);
2249 }
2250 
2251 void Assembler::jmpb_0(Label& L, const char* file, int line) {
2252   if (L.is_bound()) {
2253     const int short_size = 2;
2254     address entry = target(L);
2255     assert(entry != NULL, "jmp most probably wrong");
2256 #ifdef ASSERT
2257     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2258     intptr_t delta = short_branch_delta();
2259     if (delta != 0) {
2260       dist += (dist < 0 ? (-delta) :delta);
2261     }
2262     assert(is8bit(dist), "Dispacement too large for a short jmp at %s:%d", file, line);
2263 #endif
2264     intptr_t offs = entry - pc();
2265     emit_int16((unsigned char)0xEB, (offs - short_size) & 0xFF);
2266   } else {
2267     InstructionMark im(this);
2268     L.add_patch_at(code(), locator(), file, line);
2269     emit_int16((unsigned char)0xEB, 0);
2270   }
2271 }
2272 
2273 void Assembler::ldmxcsr( Address src) {
2274   if (UseAVX > 0 ) {
2275     InstructionMark im(this);
2276     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2277     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2278     emit_int8((unsigned char)0xAE);
2279     emit_operand(as_Register(2), src);
2280   } else {
2281     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2282     InstructionMark im(this);
2283     prefix(src);
2284     emit_int16(0x0F, (unsigned char)0xAE);
2285     emit_operand(as_Register(2), src);
2286   }
2287 }
2288 
2289 void Assembler::leal(Register dst, Address src) {
2290   InstructionMark im(this);
2291 #ifdef _LP64
2292   emit_int8(0x67); // addr32
2293   prefix(src, dst);
2294 #endif // LP64
2295   emit_int8((unsigned char)0x8D);
2296   emit_operand(dst, src);
2297 }
2298 
2299 void Assembler::lfence() {
2300   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xE8);
2301 }
2302 
2303 void Assembler::lock() {
2304   emit_int8((unsigned char)0xF0);
2305 }
2306 
2307 void Assembler::lzcntl(Register dst, Register src) {
2308   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2309   emit_int8((unsigned char)0xF3);
2310   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2311   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
2312 }
2313 
2314 // Emit mfence instruction
2315 void Assembler::mfence() {
2316   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2317   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF0);
2318 }
2319 
2320 // Emit sfence instruction
2321 void Assembler::sfence() {
2322   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2323   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF8);
2324 }
2325 
2326 void Assembler::mov(Register dst, Register src) {
2327   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2328 }
2329 
2330 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2332   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2333   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2334   attributes.set_rex_vex_w_reverted();
2335   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2336   emit_int16(0x28, (0xC0 | encode));
2337 }
2338 
2339 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2340   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2341   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2342   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2343   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2344   emit_int16(0x28, (0xC0 | encode));
2345 }
2346 
2347 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2348   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2350   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2351   emit_int16(0x16, (0xC0 | encode));
2352 }
2353 
2354 void Assembler::movb(Register dst, Address src) {
2355   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2356   InstructionMark im(this);
2357   prefix(src, dst, true);
2358   emit_int8((unsigned char)0x8A);
2359   emit_operand(dst, src);
2360 }
2361 
2362 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2363   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2364   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2365   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2366   attributes.set_rex_vex_w_reverted();
2367   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2368   emit_int16(0x12, 0xC0 | encode);
2369 }
2370 
2371 void Assembler::kmovbl(KRegister dst, Register src) {
2372   assert(VM_Version::supports_avx512dq(), "");
2373   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2374   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2375   emit_int16((unsigned char)0x92, (0xC0 | encode));
2376 }
2377 
2378 void Assembler::kmovbl(Register dst, KRegister src) {
2379   assert(VM_Version::supports_avx512dq(), "");
2380   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2381   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2382   emit_int16((unsigned char)0x93, (0xC0 | encode));
2383 }
2384 
2385 void Assembler::kmovwl(KRegister dst, Register src) {
2386   assert(VM_Version::supports_evex(), "");
2387   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2388   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2389   emit_int16((unsigned char)0x92, (0xC0 | encode));
2390 }
2391 
2392 void Assembler::kmovwl(Register dst, KRegister src) {
2393   assert(VM_Version::supports_evex(), "");
2394   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2395   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2396   emit_int16((unsigned char)0x93, (0xC0 | encode));
2397 }
2398 
2399 void Assembler::kmovwl(KRegister dst, Address src) {
2400   assert(VM_Version::supports_evex(), "");
2401   InstructionMark im(this);
2402   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2403   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2404   emit_int8((unsigned char)0x90);
2405   emit_operand((Register)dst, src);
2406 }
2407 
2408 void Assembler::kmovdl(KRegister dst, Register src) {
2409   assert(VM_Version::supports_avx512bw(), "");
2410   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2411   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2412   emit_int16((unsigned char)0x92, (0xC0 | encode));
2413 }
2414 
2415 void Assembler::kmovdl(Register dst, KRegister src) {
2416   assert(VM_Version::supports_avx512bw(), "");
2417   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2418   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2419   emit_int16((unsigned char)0x93, (0xC0 | encode));
2420 }
2421 
2422 void Assembler::kmovql(KRegister dst, KRegister src) {
2423   assert(VM_Version::supports_avx512bw(), "");
2424   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2425   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2426   emit_int16((unsigned char)0x90, (0xC0 | encode));
2427 }
2428 
2429 void Assembler::kmovql(KRegister dst, Address src) {
2430   assert(VM_Version::supports_avx512bw(), "");
2431   InstructionMark im(this);
2432   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2433   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2434   emit_int8((unsigned char)0x90);
2435   emit_operand((Register)dst, src);
2436 }
2437 
2438 void Assembler::kmovql(Address dst, KRegister src) {
2439   assert(VM_Version::supports_avx512bw(), "");
2440   InstructionMark im(this);
2441   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2442   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2443   emit_int8((unsigned char)0x90);
2444   emit_operand((Register)src, dst);
2445 }
2446 
2447 void Assembler::kmovql(KRegister dst, Register src) {
2448   assert(VM_Version::supports_avx512bw(), "");
2449   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2450   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2451   emit_int16((unsigned char)0x92, (0xC0 | encode));
2452 }
2453 
2454 void Assembler::kmovql(Register dst, KRegister src) {
2455   assert(VM_Version::supports_avx512bw(), "");
2456   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2457   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2458   emit_int16((unsigned char)0x93, (0xC0 | encode));
2459 }
2460 
2461 void Assembler::knotwl(KRegister dst, KRegister src) {
2462   assert(VM_Version::supports_evex(), "");
2463   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2464   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2465   emit_int16(0x44, (0xC0 | encode));
2466 }
2467 
2468 // This instruction produces ZF or CF flags
2469 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2470   assert(VM_Version::supports_avx512dq(), "");
2471   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2472   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2473   emit_int16((unsigned char)0x98, (0xC0 | encode));
2474 }
2475 
2476 // This instruction produces ZF or CF flags
2477 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2478   assert(VM_Version::supports_evex(), "");
2479   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2480   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2481   emit_int16((unsigned char)0x98, (0xC0 | encode));
2482 }
2483 
2484 // This instruction produces ZF or CF flags
2485 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2486   assert(VM_Version::supports_avx512bw(), "");
2487   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2488   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2489   emit_int16((unsigned char)0x98, (0xC0 | encode));
2490 }
2491 
2492 // This instruction produces ZF or CF flags
2493 void Assembler::kortestql(KRegister src1, KRegister src2) {
2494   assert(VM_Version::supports_avx512bw(), "");
2495   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2496   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2497   emit_int16((unsigned char)0x98, (0xC0 | encode));
2498 }
2499 
2500 // This instruction produces ZF or CF flags
2501 void Assembler::ktestql(KRegister src1, KRegister src2) {
2502   assert(VM_Version::supports_avx512bw(), "");
2503   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2504   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2505   emit_int16((unsigned char)0x99, (0xC0 | encode));
2506 }
2507 
2508 void Assembler::ktestq(KRegister src1, KRegister src2) {
2509   assert(VM_Version::supports_avx512bw(), "");
2510   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2511   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2512   emit_int16((unsigned char)0x99, (0xC0 | encode));
2513 }
2514 
2515 void Assembler::ktestd(KRegister src1, KRegister src2) {
2516   assert(VM_Version::supports_avx512bw(), "");
2517   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2518   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2519   emit_int16((unsigned char)0x99, (0xC0 | encode));
2520 }
2521 
2522 void Assembler::movb(Address dst, int imm8) {
2523   InstructionMark im(this);
2524    prefix(dst);
2525   emit_int8((unsigned char)0xC6);
2526   emit_operand(rax, dst, 1);
2527   emit_int8(imm8);
2528 }
2529 
2530 
2531 void Assembler::movb(Address dst, Register src) {
2532   assert(src->has_byte_register(), "must have byte register");
2533   InstructionMark im(this);
2534   prefix(dst, src, true);
2535   emit_int8((unsigned char)0x88);
2536   emit_operand(src, dst);
2537 }
2538 
2539 void Assembler::movdl(XMMRegister dst, Register src) {
2540   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2541   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2542   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2543   emit_int16(0x6E, (0xC0 | encode));
2544 }
2545 
2546 void Assembler::movdl(Register dst, XMMRegister src) {
2547   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2548   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2549   // swap src/dst to get correct prefix
2550   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2551   emit_int16(0x7E, (0xC0 | encode));
2552 }
2553 
2554 void Assembler::movdl(XMMRegister dst, Address src) {
2555   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2556   InstructionMark im(this);
2557   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2558   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2559   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2560   emit_int8(0x6E);
2561   emit_operand(dst, src);
2562 }
2563 
2564 void Assembler::movdl(Address dst, XMMRegister src) {
2565   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2566   InstructionMark im(this);
2567   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2568   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2569   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2570   emit_int8(0x7E);
2571   emit_operand(src, dst);
2572 }
2573 
2574 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2575   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2576   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2577   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2578   emit_int16(0x6F, (0xC0 | encode));
2579 }
2580 
2581 void Assembler::movdqa(XMMRegister dst, Address src) {
2582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2583   InstructionMark im(this);
2584   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2585   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2586   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2587   emit_int8(0x6F);
2588   emit_operand(dst, src);
2589 }
2590 
2591 void Assembler::movdqu(XMMRegister dst, Address src) {
2592   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2593   InstructionMark im(this);
2594   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2595   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2596   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2597   emit_int8(0x6F);
2598   emit_operand(dst, src);
2599 }
2600 
2601 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2603   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2604   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2605   emit_int16(0x6F, (0xC0 | encode));
2606 }
2607 
2608 void Assembler::movdqu(Address dst, XMMRegister src) {
2609   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2610   InstructionMark im(this);
2611   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2612   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2613   attributes.reset_is_clear_context();
2614   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2615   emit_int8(0x7F);
2616   emit_operand(src, dst);
2617 }
2618 
2619 // Move Unaligned 256bit Vector
2620 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2621   assert(UseAVX > 0, "");
2622   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2623   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2624   emit_int16(0x6F, (0xC0 | encode));
2625 }
2626 
2627 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2628   assert(UseAVX > 0, "");
2629   InstructionMark im(this);
2630   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2631   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2632   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2633   emit_int8(0x6F);
2634   emit_operand(dst, src);
2635 }
2636 
2637 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2638   assert(UseAVX > 0, "");
2639   InstructionMark im(this);
2640   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2641   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2642   attributes.reset_is_clear_context();
2643   // swap src<->dst for encoding
2644   assert(src != xnoreg, "sanity");
2645   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2646   emit_int8(0x7F);
2647   emit_operand(src, dst);
2648 }
2649 
2650 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2651 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
2652   assert(VM_Version::supports_evex(), "");
2653   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2654   attributes.set_is_evex_instruction();
2655   if (merge) {
2656     attributes.reset_is_clear_context();
2657   }
2658   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2659   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2660   emit_int16(0x6F, (0xC0 | encode));
2661 }
2662 
2663 void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
2664   assert(VM_Version::supports_evex(), "");
2665   InstructionMark im(this);
2666   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2667   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2668   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2669   attributes.set_is_evex_instruction();
2670   if (merge) {
2671     attributes.reset_is_clear_context();
2672   }
2673   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2674   emit_int8(0x6F);
2675   emit_operand(dst, src);
2676 }
2677 
2678 void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
2679   assert(VM_Version::supports_evex(), "");
2680   assert(src != xnoreg, "sanity");
2681   InstructionMark im(this);
2682   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2683   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2684   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2685   attributes.set_is_evex_instruction();
2686   if (merge) {
2687     attributes.reset_is_clear_context();
2688   }
2689   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2690   emit_int8(0x7F);
2691   emit_operand(src, dst);
2692 }
2693 
2694 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2695   assert(VM_Version::supports_avx512vlbw(), "");
2696   InstructionMark im(this);
2697   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2698   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2699   attributes.set_embedded_opmask_register_specifier(mask);
2700   attributes.set_is_evex_instruction();
2701   if (merge) {
2702     attributes.reset_is_clear_context();
2703   }
2704   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2705   emit_int8(0x6F);
2706   emit_operand(dst, src);
2707 }
2708 
2709 void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
2710   assert(VM_Version::supports_evex(), "");
2711   InstructionMark im(this);
2712   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2713   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2714   attributes.set_is_evex_instruction();
2715   if (merge) {
2716     attributes.reset_is_clear_context();
2717   }
2718   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2719   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2720   emit_int8(0x6F);
2721   emit_operand(dst, src);
2722 }
2723 
2724 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2725   assert(VM_Version::supports_avx512vlbw(), "");
2726   InstructionMark im(this);
2727   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2728   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2729   attributes.set_embedded_opmask_register_specifier(mask);
2730   attributes.set_is_evex_instruction();
2731   if (merge) {
2732     attributes.reset_is_clear_context();
2733   }
2734   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2735   emit_int8(0x6F);
2736   emit_operand(dst, src);
2737 }
2738 
2739 void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
2740   assert(VM_Version::supports_evex(), "");
2741   assert(src != xnoreg, "sanity");
2742   InstructionMark im(this);
2743   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2744   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2745   attributes.set_is_evex_instruction();
2746   if (merge) {
2747     attributes.reset_is_clear_context();
2748   }
2749   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2750   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2751   emit_int8(0x7F);
2752   emit_operand(src, dst);
2753 }
2754 
2755 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2756   assert(VM_Version::supports_avx512vlbw(), "");
2757   assert(src != xnoreg, "sanity");
2758   InstructionMark im(this);
2759   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2760   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2761   attributes.set_embedded_opmask_register_specifier(mask);
2762   attributes.set_is_evex_instruction();
2763   if (merge) {
2764     attributes.reset_is_clear_context();
2765   }
2766   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2767   emit_int8(0x7F);
2768   emit_operand(src, dst);
2769 }
2770 
2771 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2772   // Unmasked instruction
2773   evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
2774 }
2775 
2776 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2777   assert(VM_Version::supports_evex(), "");
2778   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2779   attributes.set_embedded_opmask_register_specifier(mask);
2780   attributes.set_is_evex_instruction();
2781   if (merge) {
2782     attributes.reset_is_clear_context();
2783   }
2784   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2785   emit_int16(0x6F, (0xC0 | encode));
2786 }
2787 
2788 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2789   // Unmasked instruction
2790   evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
2791 }
2792 
2793 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2794   assert(VM_Version::supports_evex(), "");
2795   InstructionMark im(this);
2796   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2797   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2798   attributes.set_embedded_opmask_register_specifier(mask);
2799   attributes.set_is_evex_instruction();
2800   if (merge) {
2801     attributes.reset_is_clear_context();
2802   }
2803   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2804   emit_int8(0x6F);
2805   emit_operand(dst, src);
2806 }
2807 
2808 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2809   // Unmasked isntruction
2810   evmovdqul(dst, k0, src, /*merge*/ true, vector_len);
2811 }
2812 
2813 void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2814   assert(VM_Version::supports_evex(), "");
2815   assert(src != xnoreg, "sanity");
2816   InstructionMark im(this);
2817   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2818   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2819   attributes.set_embedded_opmask_register_specifier(mask);
2820   attributes.set_is_evex_instruction();
2821   if (merge) {
2822     attributes.reset_is_clear_context();
2823   }
2824   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2825   emit_int8(0x7F);
2826   emit_operand(src, dst);
2827 }
2828 
2829 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2830   // Unmasked instruction
2831   if (dst->encoding() == src->encoding()) return;
2832   evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
2833 }
2834 
2835 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2836   assert(VM_Version::supports_evex(), "");
2837   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2838   attributes.set_embedded_opmask_register_specifier(mask);
2839   attributes.set_is_evex_instruction();
2840   if (merge) {
2841     attributes.reset_is_clear_context();
2842   }
2843   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2844   emit_int16(0x6F, (0xC0 | encode));
2845 }
2846 
2847 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2848   // Unmasked instruction
2849   evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
2850 }
2851 
2852 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2853   assert(VM_Version::supports_evex(), "");
2854   InstructionMark im(this);
2855   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2856   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2857   attributes.set_embedded_opmask_register_specifier(mask);
2858   attributes.set_is_evex_instruction();
2859   if (merge) {
2860     attributes.reset_is_clear_context();
2861   }
2862   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2863   emit_int8(0x6F);
2864   emit_operand(dst, src);
2865 }
2866 
2867 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2868   // Unmasked instruction
2869   evmovdquq(dst, k0, src, /*merge*/ true, vector_len);
2870 }
2871 
2872 void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2873   assert(VM_Version::supports_evex(), "");
2874   assert(src != xnoreg, "sanity");
2875   InstructionMark im(this);
2876   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2877   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2878   attributes.set_embedded_opmask_register_specifier(mask);
2879   if (merge) {
2880     attributes.reset_is_clear_context();
2881   }
2882   attributes.set_is_evex_instruction();
2883   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2884   emit_int8(0x7F);
2885   emit_operand(src, dst);
2886 }
2887 
2888 // Uses zero extension on 64bit
2889 
2890 void Assembler::movl(Register dst, int32_t imm32) {
2891   int encode = prefix_and_encode(dst->encoding());
2892   emit_int8(0xB8 | encode);
2893   emit_int32(imm32);
2894 }
2895 
2896 void Assembler::movl(Register dst, Register src) {
2897   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2898   emit_int16((unsigned char)0x8B, (0xC0 | encode));
2899 }
2900 
2901 void Assembler::movl(Register dst, Address src) {
2902   InstructionMark im(this);
2903   prefix(src, dst);
2904   emit_int8((unsigned char)0x8B);
2905   emit_operand(dst, src);
2906 }
2907 
2908 void Assembler::movl(Address dst, int32_t imm32) {
2909   InstructionMark im(this);
2910   prefix(dst);
2911   emit_int8((unsigned char)0xC7);
2912   emit_operand(rax, dst, 4);
2913   emit_int32(imm32);
2914 }
2915 
2916 void Assembler::movl(Address dst, Register src) {
2917   InstructionMark im(this);
2918   prefix(dst, src);
2919   emit_int8((unsigned char)0x89);
2920   emit_operand(src, dst);
2921 }
2922 
2923 // New cpus require to use movsd and movss to avoid partial register stall
2924 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2925 // The selection is done in MacroAssembler::movdbl() and movflt().
2926 void Assembler::movlpd(XMMRegister dst, Address src) {
2927   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2928   InstructionMark im(this);
2929   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2930   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2931   attributes.set_rex_vex_w_reverted();
2932   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2933   emit_int8(0x12);
2934   emit_operand(dst, src);
2935 }
2936 
2937 void Assembler::movq(XMMRegister dst, Address src) {
2938   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2939   InstructionMark im(this);
2940   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2941   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2942   attributes.set_rex_vex_w_reverted();
2943   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2944   emit_int8(0x7E);
2945   emit_operand(dst, src);
2946 }
2947 
2948 void Assembler::movq(Address dst, XMMRegister src) {
2949   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2950   InstructionMark im(this);
2951   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2952   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2953   attributes.set_rex_vex_w_reverted();
2954   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2955   emit_int8((unsigned char)0xD6);
2956   emit_operand(src, dst);
2957 }
2958 
2959 void Assembler::movq(XMMRegister dst, XMMRegister src) {
2960   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2961   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2962   attributes.set_rex_vex_w_reverted();
2963   int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2964   emit_int16((unsigned char)0xD6, (0xC0 | encode));
2965 }
2966 
2967 void Assembler::movq(Register dst, XMMRegister src) {
2968   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2969   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2970   // swap src/dst to get correct prefix
2971   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2972   emit_int16(0x7E, (0xC0 | encode));
2973 }
2974 
2975 void Assembler::movq(XMMRegister dst, Register src) {
2976   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2977   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2978   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2979   emit_int16(0x6E, (0xC0 | encode));
2980 }
2981 
2982 void Assembler::movsbl(Register dst, Address src) { // movsxb
2983   InstructionMark im(this);
2984   prefix(src, dst);
2985   emit_int16(0x0F, (unsigned char)0xBE);
2986   emit_operand(dst, src);
2987 }
2988 
2989 void Assembler::movsbl(Register dst, Register src) { // movsxb
2990   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2991   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2992   emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
2993 }
2994 
2995 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2996   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2997   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2998   attributes.set_rex_vex_w_reverted();
2999   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3000   emit_int16(0x10, (0xC0 | encode));
3001 }
3002 
3003 void Assembler::movsd(XMMRegister dst, Address src) {
3004   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3005   InstructionMark im(this);
3006   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3007   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3008   attributes.set_rex_vex_w_reverted();
3009   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3010   emit_int8(0x10);
3011   emit_operand(dst, src);
3012 }
3013 
3014 void Assembler::movsd(Address dst, XMMRegister src) {
3015   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3016   InstructionMark im(this);
3017   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3018   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3019   attributes.reset_is_clear_context();
3020   attributes.set_rex_vex_w_reverted();
3021   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3022   emit_int8(0x11);
3023   emit_operand(src, dst);
3024 }
3025 
3026 void Assembler::movss(XMMRegister dst, XMMRegister src) {
3027   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3028   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3029   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3030   emit_int16(0x10, (0xC0 | encode));
3031 }
3032 
3033 void Assembler::movss(XMMRegister dst, Address src) {
3034   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3035   InstructionMark im(this);
3036   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3037   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3038   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3039   emit_int8(0x10);
3040   emit_operand(dst, src);
3041 }
3042 
3043 void Assembler::movss(Address dst, XMMRegister src) {
3044   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3045   InstructionMark im(this);
3046   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3047   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3048   attributes.reset_is_clear_context();
3049   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3050   emit_int8(0x11);
3051   emit_operand(src, dst);
3052 }
3053 
3054 void Assembler::movswl(Register dst, Address src) { // movsxw
3055   InstructionMark im(this);
3056   prefix(src, dst);
3057   emit_int16(0x0F, (unsigned char)0xBF);
3058   emit_operand(dst, src);
3059 }
3060 
3061 void Assembler::movswl(Register dst, Register src) { // movsxw
3062   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3063   emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
3064 }
3065 
3066 void Assembler::movw(Address dst, int imm16) {
3067   InstructionMark im(this);
3068 
3069   emit_int8(0x66); // switch to 16-bit mode
3070   prefix(dst);
3071   emit_int8((unsigned char)0xC7);
3072   emit_operand(rax, dst, 2);
3073   emit_int16(imm16);
3074 }
3075 
3076 void Assembler::movw(Register dst, Address src) {
3077   InstructionMark im(this);
3078   emit_int8(0x66);
3079   prefix(src, dst);
3080   emit_int8((unsigned char)0x8B);
3081   emit_operand(dst, src);
3082 }
3083 
3084 void Assembler::movw(Address dst, Register src) {
3085   InstructionMark im(this);
3086   emit_int8(0x66);
3087   prefix(dst, src);
3088   emit_int8((unsigned char)0x89);
3089   emit_operand(src, dst);
3090 }
3091 
3092 void Assembler::movzbl(Register dst, Address src) { // movzxb
3093   InstructionMark im(this);
3094   prefix(src, dst);
3095   emit_int16(0x0F, (unsigned char)0xB6);
3096   emit_operand(dst, src);
3097 }
3098 
3099 void Assembler::movzbl(Register dst, Register src) { // movzxb
3100   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
3101   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
3102   emit_int24(0x0F, (unsigned char)0xB6, 0xC0 | encode);
3103 }
3104 
3105 void Assembler::movzwl(Register dst, Address src) { // movzxw
3106   InstructionMark im(this);
3107   prefix(src, dst);
3108   emit_int16(0x0F, (unsigned char)0xB7);
3109   emit_operand(dst, src);
3110 }
3111 
3112 void Assembler::movzwl(Register dst, Register src) { // movzxw
3113   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3114   emit_int24(0x0F, (unsigned char)0xB7, 0xC0 | encode);
3115 }
3116 
3117 void Assembler::mull(Address src) {
3118   InstructionMark im(this);
3119   prefix(src);
3120   emit_int8((unsigned char)0xF7);
3121   emit_operand(rsp, src);
3122 }
3123 
3124 void Assembler::mull(Register src) {
3125   int encode = prefix_and_encode(src->encoding());
3126   emit_int16((unsigned char)0xF7, (0xE0 | encode));
3127 }
3128 
3129 void Assembler::mulsd(XMMRegister dst, Address src) {
3130   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3131   InstructionMark im(this);
3132   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3133   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3134   attributes.set_rex_vex_w_reverted();
3135   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3136   emit_int8(0x59);
3137   emit_operand(dst, src);
3138 }
3139 
3140 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3141   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3142   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3143   attributes.set_rex_vex_w_reverted();
3144   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3145   emit_int16(0x59, (0xC0 | encode));
3146 }
3147 
3148 void Assembler::mulss(XMMRegister dst, Address src) {
3149   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3150   InstructionMark im(this);
3151   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3152   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3153   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3154   emit_int8(0x59);
3155   emit_operand(dst, src);
3156 }
3157 
3158 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3159   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3160   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3161   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3162   emit_int16(0x59, (0xC0 | encode));
3163 }
3164 
3165 void Assembler::negl(Register dst) {
3166   int encode = prefix_and_encode(dst->encoding());
3167   emit_int16((unsigned char)0xF7, (0xD8 | encode));
3168 }
3169 
3170 void Assembler::nop(int i) {
3171 #ifdef ASSERT
3172   assert(i > 0, " ");
3173   // The fancy nops aren't currently recognized by debuggers making it a
3174   // pain to disassemble code while debugging. If asserts are on clearly
3175   // speed is not an issue so simply use the single byte traditional nop
3176   // to do alignment.
3177 
3178   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
3179   return;
3180 
3181 #endif // ASSERT
3182 
3183   if (UseAddressNop && VM_Version::is_intel()) {
3184     //
3185     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
3186     //  1: 0x90
3187     //  2: 0x66 0x90
3188     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3189     //  4: 0x0F 0x1F 0x40 0x00
3190     //  5: 0x0F 0x1F 0x44 0x00 0x00
3191     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3192     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3193     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3194     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3195     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3196     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3197 
3198     // The rest coding is Intel specific - don't use consecutive address nops
3199 
3200     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3201     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3202     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3203     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3204 
3205     while(i >= 15) {
3206       // For Intel don't generate consecutive addess nops (mix with regular nops)
3207       i -= 15;
3208       emit_int24(0x66, 0x66, 0x66);
3209       addr_nop_8();
3210       emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3211     }
3212     switch (i) {
3213       case 14:
3214         emit_int8(0x66); // size prefix
3215       case 13:
3216         emit_int8(0x66); // size prefix
3217       case 12:
3218         addr_nop_8();
3219         emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3220         break;
3221       case 11:
3222         emit_int8(0x66); // size prefix
3223       case 10:
3224         emit_int8(0x66); // size prefix
3225       case 9:
3226         emit_int8(0x66); // size prefix
3227       case 8:
3228         addr_nop_8();
3229         break;
3230       case 7:
3231         addr_nop_7();
3232         break;
3233       case 6:
3234         emit_int8(0x66); // size prefix
3235       case 5:
3236         addr_nop_5();
3237         break;
3238       case 4:
3239         addr_nop_4();
3240         break;
3241       case 3:
3242         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3243         emit_int8(0x66); // size prefix
3244       case 2:
3245         emit_int8(0x66); // size prefix
3246       case 1:
3247         emit_int8((unsigned char)0x90);
3248                          // nop
3249         break;
3250       default:
3251         assert(i == 0, " ");
3252     }
3253     return;
3254   }
3255   if (UseAddressNop && VM_Version::is_amd_family()) {
3256     //
3257     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3258     //  1: 0x90
3259     //  2: 0x66 0x90
3260     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3261     //  4: 0x0F 0x1F 0x40 0x00
3262     //  5: 0x0F 0x1F 0x44 0x00 0x00
3263     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3264     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3265     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3266     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3267     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3268     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3269 
3270     // The rest coding is AMD specific - use consecutive address nops
3271 
3272     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3273     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3274     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3275     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3276     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3277     //     Size prefixes (0x66) are added for larger sizes
3278 
3279     while(i >= 22) {
3280       i -= 11;
3281       emit_int24(0x66, 0x66, 0x66);
3282       addr_nop_8();
3283     }
3284     // Generate first nop for size between 21-12
3285     switch (i) {
3286       case 21:
3287         i -= 1;
3288         emit_int8(0x66); // size prefix
3289       case 20:
3290       case 19:
3291         i -= 1;
3292         emit_int8(0x66); // size prefix
3293       case 18:
3294       case 17:
3295         i -= 1;
3296         emit_int8(0x66); // size prefix
3297       case 16:
3298       case 15:
3299         i -= 8;
3300         addr_nop_8();
3301         break;
3302       case 14:
3303       case 13:
3304         i -= 7;
3305         addr_nop_7();
3306         break;
3307       case 12:
3308         i -= 6;
3309         emit_int8(0x66); // size prefix
3310         addr_nop_5();
3311         break;
3312       default:
3313         assert(i < 12, " ");
3314     }
3315 
3316     // Generate second nop for size between 11-1
3317     switch (i) {
3318       case 11:
3319         emit_int8(0x66); // size prefix
3320       case 10:
3321         emit_int8(0x66); // size prefix
3322       case 9:
3323         emit_int8(0x66); // size prefix
3324       case 8:
3325         addr_nop_8();
3326         break;
3327       case 7:
3328         addr_nop_7();
3329         break;
3330       case 6:
3331         emit_int8(0x66); // size prefix
3332       case 5:
3333         addr_nop_5();
3334         break;
3335       case 4:
3336         addr_nop_4();
3337         break;
3338       case 3:
3339         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3340         emit_int8(0x66); // size prefix
3341       case 2:
3342         emit_int8(0x66); // size prefix
3343       case 1:
3344         emit_int8((unsigned char)0x90);
3345                          // nop
3346         break;
3347       default:
3348         assert(i == 0, " ");
3349     }
3350     return;
3351   }
3352 
3353   if (UseAddressNop && VM_Version::is_zx()) {
3354     //
3355     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3356     //  1: 0x90
3357     //  2: 0x66 0x90
3358     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3359     //  4: 0x0F 0x1F 0x40 0x00
3360     //  5: 0x0F 0x1F 0x44 0x00 0x00
3361     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3362     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3363     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3364     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3365     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3366     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3367 
3368     // The rest coding is ZX specific - don't use consecutive address nops
3369 
3370     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3371     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3372     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3373     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3374 
3375     while (i >= 15) {
3376       // For ZX don't generate consecutive addess nops (mix with regular nops)
3377       i -= 15;
3378       emit_int24(0x66, 0x66, 0x66);
3379       addr_nop_8();
3380       emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3381     }
3382     switch (i) {
3383       case 14:
3384         emit_int8(0x66); // size prefix
3385       case 13:
3386         emit_int8(0x66); // size prefix
3387       case 12:
3388         addr_nop_8();
3389         emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3390         break;
3391       case 11:
3392         emit_int8(0x66); // size prefix
3393       case 10:
3394         emit_int8(0x66); // size prefix
3395       case 9:
3396         emit_int8(0x66); // size prefix
3397       case 8:
3398         addr_nop_8();
3399         break;
3400       case 7:
3401         addr_nop_7();
3402         break;
3403       case 6:
3404         emit_int8(0x66); // size prefix
3405       case 5:
3406         addr_nop_5();
3407         break;
3408       case 4:
3409         addr_nop_4();
3410         break;
3411       case 3:
3412         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3413         emit_int8(0x66); // size prefix
3414       case 2:
3415         emit_int8(0x66); // size prefix
3416       case 1:
3417         emit_int8((unsigned char)0x90);
3418                          // nop
3419         break;
3420       default:
3421         assert(i == 0, " ");
3422     }
3423     return;
3424   }
3425 
3426   // Using nops with size prefixes "0x66 0x90".
3427   // From AMD Optimization Guide:
3428   //  1: 0x90
3429   //  2: 0x66 0x90
3430   //  3: 0x66 0x66 0x90
3431   //  4: 0x66 0x66 0x66 0x90
3432   //  5: 0x66 0x66 0x90 0x66 0x90
3433   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3434   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3435   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3436   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3437   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3438   //
3439   while (i > 12) {
3440     i -= 4;
3441     emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3442   }
3443   // 1 - 12 nops
3444   if (i > 8) {
3445     if (i > 9) {
3446       i -= 1;
3447       emit_int8(0x66);
3448     }
3449     i -= 3;
3450     emit_int24(0x66, 0x66, (unsigned char)0x90);
3451   }
3452   // 1 - 8 nops
3453   if (i > 4) {
3454     if (i > 6) {
3455       i -= 1;
3456       emit_int8(0x66);
3457     }
3458     i -= 3;
3459     emit_int24(0x66, 0x66, (unsigned char)0x90);
3460   }
3461   switch (i) {
3462     case 4:
3463       emit_int8(0x66);
3464     case 3:
3465       emit_int8(0x66);
3466     case 2:
3467       emit_int8(0x66);
3468     case 1:
3469       emit_int8((unsigned char)0x90);
3470       break;
3471     default:
3472       assert(i == 0, " ");
3473   }
3474 }
3475 
3476 void Assembler::notl(Register dst) {
3477   int encode = prefix_and_encode(dst->encoding());
3478   emit_int16((unsigned char)0xF7, (0xD0 | encode));
3479 }
3480 
3481 void Assembler::orw(Register dst, Register src) {
3482   (void)prefix_and_encode(dst->encoding(), src->encoding());
3483   emit_arith(0x0B, 0xC0, dst, src);
3484 }
3485 
3486 void Assembler::orl(Address dst, int32_t imm32) {
3487   InstructionMark im(this);
3488   prefix(dst);
3489   emit_arith_operand(0x81, rcx, dst, imm32);
3490 }
3491 
3492 void Assembler::orl(Register dst, int32_t imm32) {
3493   prefix(dst);
3494   emit_arith(0x81, 0xC8, dst, imm32);
3495 }
3496 
3497 void Assembler::orl(Register dst, Address src) {
3498   InstructionMark im(this);
3499   prefix(src, dst);
3500   emit_int8(0x0B);
3501   emit_operand(dst, src);
3502 }
3503 
3504 void Assembler::orl(Register dst, Register src) {
3505   (void) prefix_and_encode(dst->encoding(), src->encoding());
3506   emit_arith(0x0B, 0xC0, dst, src);
3507 }
3508 
3509 void Assembler::orl(Address dst, Register src) {
3510   InstructionMark im(this);
3511   prefix(dst, src);
3512   emit_int8(0x09);
3513   emit_operand(src, dst);
3514 }
3515 
3516 void Assembler::orb(Address dst, int imm8) {
3517   InstructionMark im(this);
3518   prefix(dst);
3519   emit_int8((unsigned char)0x80);
3520   emit_operand(rcx, dst, 1);
3521   emit_int8(imm8);
3522 }
3523 
3524 void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
3525   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3526   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3527   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3528   emit_int16(0x63, (0xC0 | encode));
3529 }
3530 
3531 void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3532   assert(UseAVX > 0, "some form of AVX must be enabled");
3533   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3534   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3535   emit_int16(0x63, (0xC0 | encode));
3536 }
3537 
3538 void Assembler::packssdw(XMMRegister dst, XMMRegister src) {
3539   assert(VM_Version::supports_sse2(), "");
3540   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3541   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3542   emit_int16(0x6B, (0xC0 | encode));
3543 }
3544 
3545 void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3546   assert(UseAVX > 0, "some form of AVX must be enabled");
3547   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3548   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3549   emit_int16(0x6B, (0xC0 | encode));
3550 }
3551 
3552 void Assembler::packuswb(XMMRegister dst, Address src) {
3553   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3554   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3555   InstructionMark im(this);
3556   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3557   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3558   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3559   emit_int8(0x67);
3560   emit_operand(dst, src);
3561 }
3562 
3563 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3565   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3566   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3567   emit_int16(0x67, (0xC0 | encode));
3568 }
3569 
3570 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3571   assert(UseAVX > 0, "some form of AVX must be enabled");
3572   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3573   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3574   emit_int16(0x67, (0xC0 | encode));
3575 }
3576 
3577 void Assembler::packusdw(XMMRegister dst, XMMRegister src) {
3578   assert(VM_Version::supports_sse4_1(), "");
3579   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3580   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3581   emit_int16(0x2B, (0xC0 | encode));
3582 }
3583 
3584 void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3585   assert(UseAVX > 0, "some form of AVX must be enabled");
3586   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3587   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3588   emit_int16(0x2B, (0xC0 | encode));
3589 }
3590 
3591 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3592   assert(VM_Version::supports_avx2(), "");
3593   assert(vector_len != AVX_128bit, "");
3594   // VEX.256.66.0F3A.W1 00 /r ib
3595   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3596   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3597   emit_int24(0x00, (0xC0 | encode), imm8);
3598 }
3599 
3600 void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3601   assert(vector_len == AVX_256bit ? VM_Version::supports_avx512vl() :
3602          vector_len == AVX_512bit ? VM_Version::supports_evex()     : false, "not supported");
3603   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3604   attributes.set_is_evex_instruction();
3605   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3606   emit_int16(0x36, (0xC0 | encode));
3607 }
3608 
3609 void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3610   assert(VM_Version::supports_avx512_vbmi(), "");
3611   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3612   attributes.set_is_evex_instruction();
3613   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3614   emit_int16((unsigned char)0x8D, (0xC0 | encode));
3615 }
3616 
3617 void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3618   assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
3619          vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
3620          vector_len == AVX_512bit ? VM_Version::supports_avx512bw()   : false, "not supported");
3621   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3622   attributes.set_is_evex_instruction();
3623   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3624   emit_int16((unsigned char)0x8D, (0xC0 | encode));
3625 }
3626 
3627 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3628   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3629   // VEX.NDS.256.66.0F38.W0 36 /r
3630   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3631   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3632   emit_int16(0x36, (0xC0 | encode));
3633 }
3634 
3635 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3636   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3637   // VEX.NDS.256.66.0F38.W0 36 /r
3638   InstructionMark im(this);
3639   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3640   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3641   emit_int8(0x36);
3642   emit_operand(dst, src);
3643 }
3644 
3645 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3646   assert(VM_Version::supports_avx2(), "");
3647   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3648   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3649   emit_int24(0x46, (0xC0 | encode), imm8);
3650 }
3651 
3652 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3653   assert(VM_Version::supports_avx(), "");
3654   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3655   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3656   emit_int24(0x06, (0xC0 | encode), imm8);
3657 }
3658 
3659 void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3660   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
3661   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3662   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3663   emit_int24(0x04, (0xC0 | encode), imm8);
3664 }
3665 
3666 void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3667   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
3668   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);
3669   attributes.set_rex_vex_w_reverted();
3670   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3671   emit_int24(0x05, (0xC0 | encode), imm8);
3672 }
3673 
3674 void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3675   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3676   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ false);
3677   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3678   emit_int24(0x01, (0xC0 | encode), imm8);
3679 }
3680 
3681 void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3682   assert(VM_Version::supports_evex(), "");
3683   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3684   attributes.set_is_evex_instruction();
3685   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3686   emit_int16(0x76, (0xC0 | encode));
3687 }
3688 
3689 void Assembler::pause() {
3690   emit_int16((unsigned char)0xF3, (unsigned char)0x90);
3691 }
3692 
3693 void Assembler::ud2() {
3694   emit_int16(0x0F, 0x0B);
3695 }
3696 
3697 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3698   assert(VM_Version::supports_sse4_2(), "");
3699   InstructionMark im(this);
3700   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3701   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3702   emit_int8(0x61);
3703   emit_operand(dst, src);
3704   emit_int8(imm8);
3705 }
3706 
3707 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3708   assert(VM_Version::supports_sse4_2(), "");
3709   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3710   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3711   emit_int24(0x61, (0xC0 | encode), imm8);
3712 }
3713 
3714 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3715 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3716   assert(VM_Version::supports_sse2(), "");
3717   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3718   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3719   emit_int16(0x74, (0xC0 | encode));
3720 }
3721 
3722 void Assembler::vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
3723   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3724   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3725   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3726   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3727   emit_int16(cond_encoding, (0xC0 | encode));
3728 }
3729 
3730 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3731 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3732   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3733   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3734   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3735   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3736   emit_int16(0x74, (0xC0 | encode));
3737 }
3738 
3739 // In this context, kdst is written the mask used to process the equal components
3740 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3741   assert(VM_Version::supports_avx512bw(), "");
3742   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3743   attributes.set_is_evex_instruction();
3744   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3745   emit_int16(0x74, (0xC0 | encode));
3746 }
3747 
3748 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3749   assert(VM_Version::supports_avx512vlbw(), "");
3750   InstructionMark im(this);
3751   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3752   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3753   attributes.set_is_evex_instruction();
3754   int dst_enc = kdst->encoding();
3755   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3756   emit_int8(0x64);
3757   emit_operand(as_Register(dst_enc), src);
3758 }
3759 
3760 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3761   assert(VM_Version::supports_avx512vlbw(), "");
3762   InstructionMark im(this);
3763   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3764   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3765   attributes.reset_is_clear_context();
3766   attributes.set_embedded_opmask_register_specifier(mask);
3767   attributes.set_is_evex_instruction();
3768   int dst_enc = kdst->encoding();
3769   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3770   emit_int8(0x64);
3771   emit_operand(as_Register(dst_enc), src);
3772 }
3773 
3774 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3775   assert(VM_Version::supports_avx512vlbw(), "");
3776   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3777   attributes.set_is_evex_instruction();
3778   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3779   emit_int24(0x3E, (0xC0 | encode), vcc);
3780 }
3781 
3782 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3783   assert(VM_Version::supports_avx512vlbw(), "");
3784   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3785   attributes.reset_is_clear_context();
3786   attributes.set_embedded_opmask_register_specifier(mask);
3787   attributes.set_is_evex_instruction();
3788   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3789   emit_int24(0x3E, (0xC0 | encode), vcc);
3790 }
3791 
3792 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3793   assert(VM_Version::supports_avx512vlbw(), "");
3794   InstructionMark im(this);
3795   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3796   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3797   attributes.set_is_evex_instruction();
3798   int dst_enc = kdst->encoding();
3799   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3800   emit_int8(0x3E);
3801   emit_operand(as_Register(dst_enc), src);
3802   emit_int8(vcc);
3803 }
3804 
3805 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3806   assert(VM_Version::supports_avx512bw(), "");
3807   InstructionMark im(this);
3808   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3809   attributes.set_is_evex_instruction();
3810   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3811   int dst_enc = kdst->encoding();
3812   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3813   emit_int8(0x74);
3814   emit_operand(as_Register(dst_enc), src);
3815 }
3816 
3817 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3818   assert(VM_Version::supports_avx512vlbw(), "");
3819   InstructionMark im(this);
3820   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3821   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3822   attributes.reset_is_clear_context();
3823   attributes.set_embedded_opmask_register_specifier(mask);
3824   attributes.set_is_evex_instruction();
3825   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3826   emit_int8(0x74);
3827   emit_operand(as_Register(kdst->encoding()), src);
3828 }
3829 
3830 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3831 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3832   assert(VM_Version::supports_sse2(), "");
3833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3834   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3835   emit_int16(0x75, (0xC0 | encode));
3836 }
3837 
3838 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3839 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3840   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3841   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3842   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3843   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3844   emit_int16(0x75, (0xC0 | encode));
3845 }
3846 
3847 // In this context, kdst is written the mask used to process the equal components
3848 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3849   assert(VM_Version::supports_avx512bw(), "");
3850   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3851   attributes.set_is_evex_instruction();
3852   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3853   emit_int16(0x75, (0xC0 | encode));
3854 }
3855 
3856 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3857   assert(VM_Version::supports_avx512bw(), "");
3858   InstructionMark im(this);
3859   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3860   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3861   attributes.set_is_evex_instruction();
3862   int dst_enc = kdst->encoding();
3863   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3864   emit_int8(0x75);
3865   emit_operand(as_Register(dst_enc), src);
3866 }
3867 
3868 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3869 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3870   assert(VM_Version::supports_sse2(), "");
3871   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3872   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3873   emit_int16(0x76, (0xC0 | encode));
3874 }
3875 
3876 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3877 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3878   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3879   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3880   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3881   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3882   emit_int16(0x76, (0xC0 | encode));
3883 }
3884 
3885 // In this context, kdst is written the mask used to process the equal components
3886 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
3887   assert(VM_Version::supports_evex(), "");
3888   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3889   attributes.set_is_evex_instruction();
3890   attributes.reset_is_clear_context();
3891   attributes.set_embedded_opmask_register_specifier(mask);
3892   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3893   emit_int16(0x76, (0xC0 | encode));
3894 }
3895 
3896 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3897   assert(VM_Version::supports_evex(), "");
3898   InstructionMark im(this);
3899   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3900   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3901   attributes.set_is_evex_instruction();
3902   attributes.reset_is_clear_context();
3903   attributes.set_embedded_opmask_register_specifier(mask);
3904   int dst_enc = kdst->encoding();
3905   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3906   emit_int8(0x76);
3907   emit_operand(as_Register(dst_enc), src);
3908 }
3909 
3910 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3911 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3912   assert(VM_Version::supports_sse4_1(), "");
3913   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3914   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3915   emit_int16(0x29, (0xC0 | encode));
3916 }
3917 
3918 void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
3919   assert(VM_Version::supports_avx(), "");
3920   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3921   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3922   emit_int16(cond_encoding, (0xC0 | encode));
3923 }
3924 
3925 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3926 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3927   assert(VM_Version::supports_avx(), "");
3928   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3929   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3930   emit_int16(0x29, (0xC0 | encode));
3931 }
3932 
3933 // In this context, kdst is written the mask used to process the equal components
3934 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3935   assert(VM_Version::supports_evex(), "");
3936   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3937   attributes.reset_is_clear_context();
3938   attributes.set_is_evex_instruction();
3939   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3940   emit_int16(0x29, (0xC0 | encode));
3941 }
3942 
3943 // In this context, kdst is written the mask used to process the equal components
3944 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3945   assert(VM_Version::supports_evex(), "");
3946   InstructionMark im(this);
3947   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3948   attributes.reset_is_clear_context();
3949   attributes.set_is_evex_instruction();
3950   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3951   int dst_enc = kdst->encoding();
3952   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3953   emit_int8(0x29);
3954   emit_operand(as_Register(dst_enc), src);
3955 }
3956 
3957 void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) {
3958   assert(UseAVX > 2  && VM_Version::supports_avx512dq(), "");
3959   assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
3960   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3961   attributes.set_is_evex_instruction();
3962   int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3963   emit_int16(0x39, (0xC0 | encode));
3964 }
3965 
3966 void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) {
3967   assert(UseAVX > 2  && VM_Version::supports_avx512dq(), "");
3968   assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
3969   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3970   attributes.set_is_evex_instruction();
3971   int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3972   emit_int16(0x39, (0xC0 | encode));
3973 }
3974 
3975 void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
3976   assert(VM_Version::supports_sse4_1(), "");
3977   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3978   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3979   emit_int16(0x37, (0xC0 | encode));
3980 }
3981 
3982 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3983   assert(VM_Version::supports_sse2(), "");
3984   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3985   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3986   emit_int16((unsigned char)0xD7, (0xC0 | encode));
3987 }
3988 
3989 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3990   assert(VM_Version::supports_avx2(), "");
3991   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3992   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3993   emit_int16((unsigned char)0xD7, (0xC0 | encode));
3994 }
3995 
3996 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3997   assert(VM_Version::supports_sse4_1(), "");
3998   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3999   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4000   emit_int24(0x16, (0xC0 | encode), imm8);
4001 }
4002 
4003 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
4004   assert(VM_Version::supports_sse4_1(), "");
4005   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4006   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4007   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4008   emit_int8(0x16);
4009   emit_operand(src, dst);
4010   emit_int8(imm8);
4011 }
4012 
4013 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
4014   assert(VM_Version::supports_sse4_1(), "");
4015   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4016   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4017   emit_int24(0x16, (0xC0 | encode), imm8);
4018 }
4019 
4020 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
4021   assert(VM_Version::supports_sse4_1(), "");
4022   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4023   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4024   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4025   emit_int8(0x16);
4026   emit_operand(src, dst);
4027   emit_int8(imm8);
4028 }
4029 
4030 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
4031   assert(VM_Version::supports_sse2(), "");
4032   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4033   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4034   emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8);
4035 }
4036 
4037 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
4038   assert(VM_Version::supports_sse4_1(), "");
4039   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4040   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4041   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4042   emit_int8(0x15);
4043   emit_operand(src, dst);
4044   emit_int8(imm8);
4045 }
4046 
4047 void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
4048   assert(VM_Version::supports_sse4_1(), "");
4049   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4050   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4051   emit_int24(0x14, (0xC0 | encode), imm8);
4052 }
4053 
4054 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
4055   assert(VM_Version::supports_sse4_1(), "");
4056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4057   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4058   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4059   emit_int8(0x14);
4060   emit_operand(src, dst);
4061   emit_int8(imm8);
4062 }
4063 
4064 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
4065   assert(VM_Version::supports_sse4_1(), "");
4066   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4067   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4068   emit_int24(0x22, (0xC0 | encode), imm8);
4069 }
4070 
4071 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
4072   assert(VM_Version::supports_sse4_1(), "");
4073   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4074   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4075   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4076   emit_int8(0x22);
4077   emit_operand(dst,src);
4078   emit_int8(imm8);
4079 }
4080 
4081 void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4082   assert(VM_Version::supports_avx(), "");
4083   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4084   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4085   emit_int24(0x22, (0xC0 | encode), imm8);
4086 }
4087 
4088 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
4089   assert(VM_Version::supports_sse4_1(), "");
4090   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4091   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4092   emit_int24(0x22, (0xC0 | encode), imm8);
4093 }
4094 
4095 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
4096   assert(VM_Version::supports_sse4_1(), "");
4097   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4098   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4099   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4100   emit_int8(0x22);
4101   emit_operand(dst, src);
4102   emit_int8(imm8);
4103 }
4104 
4105 void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4106   assert(VM_Version::supports_avx(), "");
4107   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4108   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4109   emit_int24(0x22, (0xC0 | encode), imm8);
4110 }
4111 
4112 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
4113   assert(VM_Version::supports_sse2(), "");
4114   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4115   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4116   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
4117 }
4118 
4119 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
4120   assert(VM_Version::supports_sse2(), "");
4121   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4122   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4123   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4124   emit_int8((unsigned char)0xC4);
4125   emit_operand(dst, src);
4126   emit_int8(imm8);
4127 }
4128 
4129 void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4130   assert(VM_Version::supports_avx(), "");
4131   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4132   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4133   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
4134 }
4135 
4136 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
4137   assert(VM_Version::supports_sse4_1(), "");
4138   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4139   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4140   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4141   emit_int8(0x20);
4142   emit_operand(dst, src);
4143   emit_int8(imm8);
4144 }
4145 
4146 void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) {
4147   assert(VM_Version::supports_sse4_1(), "");
4148   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4149   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4150   emit_int24(0x20, (0xC0 | encode), imm8);
4151 }
4152 
4153 void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4154   assert(VM_Version::supports_avx(), "");
4155   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4156   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4157   emit_int24(0x20, (0xC0 | encode), imm8);
4158 }
4159 
4160 void Assembler::insertps(XMMRegister dst, XMMRegister src, int imm8) {
4161   assert(VM_Version::supports_sse4_1(), "");
4162   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4163   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4164   emit_int24(0x21, (0xC0 | encode), imm8);
4165 }
4166 
4167 void Assembler::vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
4168   assert(VM_Version::supports_avx(), "");
4169   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4170   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4171   emit_int24(0x21, (0xC0 | encode), imm8);
4172 }
4173 
4174 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
4175   assert(VM_Version::supports_sse4_1(), "");
4176   InstructionMark im(this);
4177   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4178   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4179   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4180   emit_int8(0x30);
4181   emit_operand(dst, src);
4182 }
4183 
4184 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
4185   assert(VM_Version::supports_sse4_1(), "");
4186   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4187   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4188   emit_int16(0x30, (0xC0 | encode));
4189 }
4190 
4191 void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
4192   assert(VM_Version::supports_sse4_1(), "");
4193   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4194   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4195   emit_int16(0x20, (0xC0 | encode));
4196 }
4197 
4198 void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
4199   assert(VM_Version::supports_sse4_1(), "");
4200   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4201   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4202   emit_int16(0x35, (0xC0 | encode));
4203 }
4204 
4205 void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
4206   assert(VM_Version::supports_sse4_1(), "");
4207   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4208   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4209   emit_int16(0x21, (0xC0 | encode));
4210 }
4211 
4212 void Assembler::pmovzxbd(XMMRegister dst, XMMRegister src) {
4213   assert(VM_Version::supports_sse4_1(), "");
4214   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4215   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4216   emit_int16(0x31, (0xC0 | encode));
4217 }
4218 
4219 void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
4220   assert(VM_Version::supports_sse4_1(), "");
4221   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4222   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4223   emit_int16(0x22, (0xC0 | encode));
4224 }
4225 
4226 void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
4227   assert(VM_Version::supports_sse4_1(), "");
4228   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4229   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4230   emit_int16(0x23, (0xC0 | encode));
4231 }
4232 
4233 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
4234   assert(VM_Version::supports_avx(), "");
4235   InstructionMark im(this);
4236   assert(dst != xnoreg, "sanity");
4237   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4238   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4239   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4240   emit_int8(0x30);
4241   emit_operand(dst, src);
4242 }
4243 
4244 void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4245   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4246   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4247   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4248   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4249   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4250   emit_int16(0x30, (unsigned char) (0xC0 | encode));
4251 }
4252 
4253 void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4254   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4255   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4256   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4257   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4258   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4259   emit_int16(0x20, (0xC0 | encode));
4260 }
4261 
4262 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
4263   assert(VM_Version::supports_avx512vlbw(), "");
4264   assert(dst != xnoreg, "sanity");
4265   InstructionMark im(this);
4266   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4267   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4268   attributes.set_embedded_opmask_register_specifier(mask);
4269   attributes.set_is_evex_instruction();
4270   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4271   emit_int8(0x30);
4272   emit_operand(dst, src);
4273 }
4274 
4275 void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
4276   assert(VM_Version::supports_evex(), "");
4277   // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
4278   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4279   attributes.set_is_evex_instruction();
4280   attributes.set_embedded_opmask_register_specifier(mask);
4281   if (merge) {
4282     attributes.reset_is_clear_context();
4283   }
4284   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4285   emit_int16((unsigned char)0xDB, (0xC0 | encode));
4286 }
4287 
4288 void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4289   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4290   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4291   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4292   emit_int16(0x35, (0xC0 | encode));
4293 }
4294 
4295 void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4296   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4297   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4298   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4299   emit_int16(0x31, (0xC0 | encode));
4300 }
4301 
4302 void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4303   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4304   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4305   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4306   emit_int16(0x32, (0xC0 | encode));
4307 }
4308 
4309 void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4310   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4311          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4312              VM_Version::supports_evex(), "");
4313   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4314   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4315   emit_int16(0x21, (0xC0 | encode));
4316 }
4317 
4318 void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4319   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4320          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4321              VM_Version::supports_evex(), "");
4322   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4323   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4324   emit_int16(0x22, (0xC0 | encode));
4325 }
4326 
4327 void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) {
4328   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4329          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4330              VM_Version::supports_evex(), "");
4331   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4332   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4333   emit_int16(0x23, (0xC0 | encode));
4334 }
4335 
4336 void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) {
4337   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4338          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4339              VM_Version::supports_evex(), "");
4340   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4341   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4342   emit_int16(0x24, (0xC0 | encode));
4343 }
4344 
4345 void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4346   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4347          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4348              VM_Version::supports_evex(), "");
4349   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4350   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4351   emit_int16(0x25, (0xC0 | encode));
4352 }
4353 
4354 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
4355   assert(VM_Version::supports_avx512vlbw(), "");
4356   assert(src != xnoreg, "sanity");
4357   InstructionMark im(this);
4358   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4359   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4360   attributes.set_is_evex_instruction();
4361   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4362   emit_int8(0x30);
4363   emit_operand(src, dst);
4364 }
4365 
4366 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
4367   assert(VM_Version::supports_avx512vlbw(), "");
4368   assert(src != xnoreg, "sanity");
4369   InstructionMark im(this);
4370   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4371   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4372   attributes.reset_is_clear_context();
4373   attributes.set_embedded_opmask_register_specifier(mask);
4374   attributes.set_is_evex_instruction();
4375   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4376   emit_int8(0x30);
4377   emit_operand(src, dst);
4378 }
4379 
4380 void Assembler::evpmovdb(Address dst, XMMRegister src, int vector_len) {
4381   assert(VM_Version::supports_evex(), "");
4382   assert(src != xnoreg, "sanity");
4383   InstructionMark im(this);
4384   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4385   attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit);
4386   attributes.set_is_evex_instruction();
4387   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4388   emit_int8(0x31);
4389   emit_operand(src, dst);
4390 }
4391 
4392 void Assembler::vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len) {
4393   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4394   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4395   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " ");
4396   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4397   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4398   emit_int16(0x33, (0xC0 | encode));
4399 }
4400 
4401 void Assembler::pmaddwd(XMMRegister dst, XMMRegister src) {
4402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4403   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4404   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4405   emit_int16((unsigned char)0xF5, (0xC0 | encode));
4406 }
4407 
4408 void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4409   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4410     (vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4411     (vector_len == AVX_512bit ? VM_Version::supports_evex() : 0)), "");
4412   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4413   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4414   emit_int16((unsigned char)0xF5, (0xC0 | encode));
4415 }
4416 
4417 void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4418   assert(VM_Version::supports_evex(), "");
4419   assert(VM_Version::supports_avx512_vnni(), "must support vnni");
4420   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4421   attributes.set_is_evex_instruction();
4422   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4423   emit_int16(0x52, (0xC0 | encode));
4424 }
4425 
4426 // generic
4427 void Assembler::pop(Register dst) {
4428   int encode = prefix_and_encode(dst->encoding());
4429   emit_int8(0x58 | encode);
4430 }
4431 
4432 void Assembler::popcntl(Register dst, Address src) {
4433   assert(VM_Version::supports_popcnt(), "must support");
4434   InstructionMark im(this);
4435   emit_int8((unsigned char)0xF3);
4436   prefix(src, dst);
4437   emit_int16(0x0F, (unsigned char)0xB8);
4438   emit_operand(dst, src);
4439 }
4440 
4441 void Assembler::popcntl(Register dst, Register src) {
4442   assert(VM_Version::supports_popcnt(), "must support");
4443   emit_int8((unsigned char)0xF3);
4444   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4445   emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
4446 }
4447 
4448 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
4449   assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
4450   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4451   attributes.set_is_evex_instruction();
4452   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4453   emit_int16(0x55, (0xC0 | encode));
4454 }
4455 
4456 void Assembler::popf() {
4457   emit_int8((unsigned char)0x9D);
4458 }
4459 
4460 #ifndef _LP64 // no 32bit push/pop on amd64
4461 void Assembler::popl(Address dst) {
4462   // NOTE: this will adjust stack by 8byte on 64bits
4463   InstructionMark im(this);
4464   prefix(dst);
4465   emit_int8((unsigned char)0x8F);
4466   emit_operand(rax, dst);
4467 }
4468 #endif
4469 
4470 void Assembler::prefetchnta(Address src) {
4471   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4472   InstructionMark im(this);
4473   prefix(src);
4474   emit_int16(0x0F, 0x18);
4475   emit_operand(rax, src); // 0, src
4476 }
4477 
4478 void Assembler::prefetchr(Address src) {
4479   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4480   InstructionMark im(this);
4481   prefix(src);
4482   emit_int16(0x0F, 0x0D);
4483   emit_operand(rax, src); // 0, src
4484 }
4485 
4486 void Assembler::prefetcht0(Address src) {
4487   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4488   InstructionMark im(this);
4489   prefix(src);
4490   emit_int16(0x0F, 0x18);
4491   emit_operand(rcx, src); // 1, src
4492 }
4493 
4494 void Assembler::prefetcht1(Address src) {
4495   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4496   InstructionMark im(this);
4497   prefix(src);
4498   emit_int16(0x0F, 0x18);
4499   emit_operand(rdx, src); // 2, src
4500 }
4501 
4502 void Assembler::prefetcht2(Address src) {
4503   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4504   InstructionMark im(this);
4505   prefix(src);
4506   emit_int16(0x0F, 0x18);
4507   emit_operand(rbx, src); // 3, src
4508 }
4509 
4510 void Assembler::prefetchw(Address src) {
4511   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4512   InstructionMark im(this);
4513   prefix(src);
4514   emit_int16(0x0F, 0x0D);
4515   emit_operand(rcx, src); // 1, src
4516 }
4517 
4518 void Assembler::prefix(Prefix p) {
4519   emit_int8(p);
4520 }
4521 
4522 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4523   assert(VM_Version::supports_ssse3(), "");
4524   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4525   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4526   emit_int16(0x00, (0xC0 | encode));
4527 }
4528 
4529 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4530   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4531          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4532          vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4533   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4534   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4535   emit_int16(0x00, (0xC0 | encode));
4536 }
4537 
4538 void Assembler::pshufb(XMMRegister dst, Address src) {
4539   assert(VM_Version::supports_ssse3(), "");
4540   InstructionMark im(this);
4541   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4542   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4543   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4544   emit_int8(0x00);
4545   emit_operand(dst, src);
4546 }
4547 
4548 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4549   assert(isByte(mode), "invalid value");
4550   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4551   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4552   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4553   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4554   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4555 }
4556 
4557 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4558   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4559          (vector_len == AVX_256bit? VM_Version::supports_avx2() :
4560          (vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), "");
4561   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4562   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4563   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4564   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4565 }
4566 
4567 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4568   assert(isByte(mode), "invalid value");
4569   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4570   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4571   InstructionMark im(this);
4572   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4573   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4574   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4575   emit_int8(0x70);
4576   emit_operand(dst, src);
4577   emit_int8(mode & 0xFF);
4578 }
4579 
4580 void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) {
4581   assert(isByte(mode), "invalid value");
4582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4583   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4584   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4585   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4586 }
4587 
4588 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4589   assert(isByte(mode), "invalid value");
4590   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4591   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4592   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4593   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4594 }
4595 
4596 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4597   assert(isByte(mode), "invalid value");
4598   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4599   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4600   InstructionMark im(this);
4601   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4602   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4603   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4604   emit_int8(0x70);
4605   emit_operand(dst, src);
4606   emit_int8(mode & 0xFF);
4607 }
4608 
4609 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4610   assert(VM_Version::supports_evex(), "requires EVEX support");
4611   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4612   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4613   attributes.set_is_evex_instruction();
4614   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4615   emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
4616 }
4617 
4618 void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
4619   assert(isByte(imm8), "invalid value");
4620   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4621   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4622   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4623   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4624 }
4625 
4626 void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4627   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4628   attributes.set_rex_vex_w_reverted();
4629   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4630   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4631 }
4632 
4633 void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
4634   assert(isByte(imm8), "invalid value");
4635   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4636   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4637   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4638   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4639 }
4640 
4641 void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4642   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4643   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4644   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4645 }
4646 
4647 void Assembler::psrldq(XMMRegister dst, int shift) {
4648   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4649   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4650   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4651   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4652   emit_int24(0x73, (0xC0 | encode), shift);
4653 }
4654 
4655 void Assembler::vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4656   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4657          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4658          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : 0, "");
4659   InstructionAttr attributes(vector_len, /*vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4660   int encode = vex_prefix_and_encode(xmm3->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4661   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
4662 }
4663 
4664 void Assembler::pslldq(XMMRegister dst, int shift) {
4665   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4668   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4669   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4670   emit_int24(0x73, (0xC0 | encode), shift);
4671 }
4672 
4673 void Assembler::vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4674   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4675          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4676          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : 0, "");
4677   InstructionAttr attributes(vector_len, /*vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4678   int encode = vex_prefix_and_encode(xmm7->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4679   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
4680 }
4681 
4682 void Assembler::ptest(XMMRegister dst, Address src) {
4683   assert(VM_Version::supports_sse4_1(), "");
4684   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4685   InstructionMark im(this);
4686   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4687   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4688   emit_int8(0x17);
4689   emit_operand(dst, src);
4690 }
4691 
4692 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4693   assert(VM_Version::supports_sse4_1() || VM_Version::supports_avx(), "");
4694   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4695   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4696   emit_int8(0x17);
4697   emit_int8((0xC0 | encode));
4698 }
4699 
4700 void Assembler::vptest(XMMRegister dst, Address src) {
4701   assert(VM_Version::supports_avx(), "");
4702   InstructionMark im(this);
4703   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4704   assert(dst != xnoreg, "sanity");
4705   // swap src<->dst for encoding
4706   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4707   emit_int8(0x17);
4708   emit_operand(dst, src);
4709 }
4710 
4711 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4712   assert(VM_Version::supports_avx(), "");
4713   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4714   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4715   emit_int16(0x17, (0xC0 | encode));
4716 }
4717 
4718 void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
4719   assert(VM_Version::supports_avx(), "");
4720   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4721   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4722   emit_int16(0x17, (0xC0 | encode));
4723 }
4724 
4725 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4726   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4727   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4728   InstructionMark im(this);
4729   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true);
4730   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4731   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4732   emit_int8(0x60);
4733   emit_operand(dst, src);
4734 }
4735 
4736 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4737   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4738   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true);
4739   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4740   emit_int16(0x60, (0xC0 | encode));
4741 }
4742 
4743 void Assembler::punpckldq(XMMRegister dst, Address src) {
4744   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4745   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4746   InstructionMark im(this);
4747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4748   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4749   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4750   emit_int8(0x62);
4751   emit_operand(dst, src);
4752 }
4753 
4754 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4756   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4757   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4758   emit_int16(0x62, (0xC0 | encode));
4759 }
4760 
4761 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4762   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4763   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4764   attributes.set_rex_vex_w_reverted();
4765   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4766   emit_int16(0x6C, (0xC0 | encode));
4767 }
4768 
4769 void Assembler::push(int32_t imm32) {
4770   // in 64bits we push 64bits onto the stack but only
4771   // take a 32bit immediate
4772   emit_int8(0x68);
4773   emit_int32(imm32);
4774 }
4775 
4776 void Assembler::push(Register src) {
4777   int encode = prefix_and_encode(src->encoding());
4778   emit_int8(0x50 | encode);
4779 }
4780 
4781 void Assembler::pushf() {
4782   emit_int8((unsigned char)0x9C);
4783 }
4784 
4785 #ifndef _LP64 // no 32bit push/pop on amd64
4786 void Assembler::pushl(Address src) {
4787   // Note this will push 64bit on 64bit
4788   InstructionMark im(this);
4789   prefix(src);
4790   emit_int8((unsigned char)0xFF);
4791   emit_operand(rsi, src);
4792 }
4793 #endif
4794 
4795 void Assembler::rcll(Register dst, int imm8) {
4796   assert(isShiftCount(imm8), "illegal shift count");
4797   int encode = prefix_and_encode(dst->encoding());
4798   if (imm8 == 1) {
4799     emit_int16((unsigned char)0xD1, (0xD0 | encode));
4800   } else {
4801     emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
4802   }
4803 }
4804 
4805 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4806   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4807   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4808   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4809   emit_int16(0x53, (0xC0 | encode));
4810 }
4811 
4812 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4813   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4815   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4816   emit_int16(0x53, (0xC0 | encode));
4817 }
4818 
4819 void Assembler::rdtsc() {
4820   emit_int16(0x0F, 0x31);
4821 }
4822 
4823 // copies data from [esi] to [edi] using rcx pointer sized words
4824 // generic
4825 void Assembler::rep_mov() {
4826   // REP
4827   // MOVSQ
4828   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xA5);)
4829   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xA5);)
4830 }
4831 
4832 // sets rcx bytes with rax, value at [edi]
4833 void Assembler::rep_stosb() {
4834   // REP
4835   // STOSB
4836   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAA);)
4837   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xAA);)
4838 }
4839 
4840 // sets rcx pointer sized words with rax, value at [edi]
4841 // generic
4842 void Assembler::rep_stos() {
4843   // REP
4844   // LP64:STOSQ, LP32:STOSD
4845   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAB);)
4846   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xAB);)
4847 }
4848 
4849 // scans rcx pointer sized words at [edi] for occurance of rax,
4850 // generic
4851 void Assembler::repne_scan() { // repne_scan
4852   // SCASQ
4853   LP64_ONLY(emit_int24((unsigned char)0xF2, REX_W, (unsigned char)0xAF);)
4854   NOT_LP64( emit_int16((unsigned char)0xF2,        (unsigned char)0xAF);)
4855 }
4856 
4857 #ifdef _LP64
4858 // scans rcx 4 byte words at [edi] for occurance of rax,
4859 // generic
4860 void Assembler::repne_scanl() { // repne_scan
4861   // SCASL
4862   emit_int16((unsigned char)0xF2, (unsigned char)0xAF);
4863 }
4864 #endif
4865 
4866 void Assembler::ret(int imm16) {
4867   if (imm16 == 0) {
4868     emit_int8((unsigned char)0xC3);
4869   } else {
4870     emit_int8((unsigned char)0xC2);
4871     emit_int16(imm16);
4872   }
4873 }
4874 
4875 void Assembler::sahf() {
4876 #ifdef _LP64
4877   // Not supported in 64bit mode
4878   ShouldNotReachHere();
4879 #endif
4880   emit_int8((unsigned char)0x9E);
4881 }
4882 
4883 void Assembler::sarl(Register dst, int imm8) {
4884   int encode = prefix_and_encode(dst->encoding());
4885   assert(isShiftCount(imm8), "illegal shift count");
4886   if (imm8 == 1) {
4887     emit_int16((unsigned char)0xD1, (0xF8 | encode));
4888   } else {
4889     emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
4890   }
4891 }
4892 
4893 void Assembler::sarl(Register dst) {
4894   int encode = prefix_and_encode(dst->encoding());
4895   emit_int16((unsigned char)0xD3, (0xF8 | encode));
4896 }
4897 
4898 void Assembler::sbbl(Address dst, int32_t imm32) {
4899   InstructionMark im(this);
4900   prefix(dst);
4901   emit_arith_operand(0x81, rbx, dst, imm32);
4902 }
4903 
4904 void Assembler::sbbl(Register dst, int32_t imm32) {
4905   prefix(dst);
4906   emit_arith(0x81, 0xD8, dst, imm32);
4907 }
4908 
4909 
4910 void Assembler::sbbl(Register dst, Address src) {
4911   InstructionMark im(this);
4912   prefix(src, dst);
4913   emit_int8(0x1B);
4914   emit_operand(dst, src);
4915 }
4916 
4917 void Assembler::sbbl(Register dst, Register src) {
4918   (void) prefix_and_encode(dst->encoding(), src->encoding());
4919   emit_arith(0x1B, 0xC0, dst, src);
4920 }
4921 
4922 void Assembler::setb(Condition cc, Register dst) {
4923   assert(0 <= cc && cc < 16, "illegal cc");
4924   int encode = prefix_and_encode(dst->encoding(), true);
4925   emit_int24(0x0F, (unsigned char)0x90 | cc, (0xC0 | encode));
4926 }
4927 
4928 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4929   assert(VM_Version::supports_ssse3(), "");
4930   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4931   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4932   emit_int24(0x0F, (0xC0 | encode), imm8);
4933 }
4934 
4935 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4936   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4937          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4938          0, "");
4939   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4940   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4941   emit_int24(0x0F, (0xC0 | encode), imm8);
4942 }
4943 
4944 void Assembler::evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
4945   assert(VM_Version::supports_evex(), "");
4946   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4947   attributes.set_is_evex_instruction();
4948   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4949   emit_int24(0x3, (0xC0 | encode), imm8);
4950 }
4951 
4952 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4953   assert(VM_Version::supports_sse4_1(), "");
4954   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4955   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4956   emit_int24(0x0E, (0xC0 | encode), imm8);
4957 }
4958 
4959 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4960   assert(VM_Version::supports_sha(), "");
4961   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4962   emit_int24((unsigned char)0xCC, (0xC0 | encode), (unsigned char)imm8);
4963 }
4964 
4965 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4966   assert(VM_Version::supports_sha(), "");
4967   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4968   emit_int16((unsigned char)0xC8, (0xC0 | encode));
4969 }
4970 
4971 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4972   assert(VM_Version::supports_sha(), "");
4973   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4974   emit_int16((unsigned char)0xC9, (0xC0 | encode));
4975 }
4976 
4977 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4978   assert(VM_Version::supports_sha(), "");
4979   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4980   emit_int16((unsigned char)0xCA, (0xC0 | encode));
4981 }
4982 
4983 // xmm0 is implicit additional source to this instruction.
4984 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4985   assert(VM_Version::supports_sha(), "");
4986   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4987   emit_int16((unsigned char)0xCB, (0xC0 | encode));
4988 }
4989 
4990 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4991   assert(VM_Version::supports_sha(), "");
4992   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4993   emit_int16((unsigned char)0xCC, (0xC0 | encode));
4994 }
4995 
4996 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4997   assert(VM_Version::supports_sha(), "");
4998   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4999   emit_int16((unsigned char)0xCD, (0xC0 | encode));
5000 }
5001 
5002 
5003 void Assembler::shll(Register dst, int imm8) {
5004   assert(isShiftCount(imm8), "illegal shift count");
5005   int encode = prefix_and_encode(dst->encoding());
5006   if (imm8 == 1 ) {
5007     emit_int16((unsigned char)0xD1, (0xE0 | encode));
5008   } else {
5009     emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
5010   }
5011 }
5012 
5013 void Assembler::shll(Register dst) {
5014   int encode = prefix_and_encode(dst->encoding());
5015   emit_int16((unsigned char)0xD3, (0xE0 | encode));
5016 }
5017 
5018 void Assembler::shrl(Register dst, int imm8) {
5019   assert(isShiftCount(imm8), "illegal shift count");
5020   int encode = prefix_and_encode(dst->encoding());
5021   emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
5022 }
5023 
5024 void Assembler::shrl(Register dst) {
5025   int encode = prefix_and_encode(dst->encoding());
5026   emit_int16((unsigned char)0xD3, (0xE8 | encode));
5027 }
5028 
5029 void Assembler::shldl(Register dst, Register src) {
5030   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5031   emit_int24(0x0F, (unsigned char)0xA5, (0xC0 | encode));
5032 }
5033 
5034 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
5035   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5036   emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
5037 }
5038 
5039 void Assembler::shrdl(Register dst, Register src) {
5040   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5041   emit_int24(0x0F, (unsigned char)0xAD, (0xC0 | encode));
5042 }
5043 
5044 void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
5045   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5046   emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
5047 }
5048 
5049 // copies a single word from [esi] to [edi]
5050 void Assembler::smovl() {
5051   emit_int8((unsigned char)0xA5);
5052 }
5053 
5054 void Assembler::roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) {
5055   assert(VM_Version::supports_sse4_1(), "");
5056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5057   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5058   emit_int24(0x0B, (0xC0 | encode), (unsigned char)rmode);
5059 }
5060 
5061 void Assembler::roundsd(XMMRegister dst, Address src, int32_t rmode) {
5062   assert(VM_Version::supports_sse4_1(), "");
5063   InstructionMark im(this);
5064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5065   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5066   emit_int8(0x0B);
5067   emit_operand(dst, src);
5068   emit_int8((unsigned char)rmode);
5069 }
5070 
5071 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
5072   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5073   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5074   attributes.set_rex_vex_w_reverted();
5075   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5076   emit_int16(0x51, (0xC0 | encode));
5077 }
5078 
5079 void Assembler::sqrtsd(XMMRegister dst, Address src) {
5080   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5081   InstructionMark im(this);
5082   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5083   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5084   attributes.set_rex_vex_w_reverted();
5085   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5086   emit_int8(0x51);
5087   emit_operand(dst, src);
5088 }
5089 
5090 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
5091   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5092   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5093   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5094   emit_int16(0x51, (0xC0 | encode));
5095 }
5096 
5097 void Assembler::std() {
5098   emit_int8((unsigned char)0xFD);
5099 }
5100 
5101 void Assembler::sqrtss(XMMRegister dst, Address src) {
5102   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5103   InstructionMark im(this);
5104   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5105   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5106   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5107   emit_int8(0x51);
5108   emit_operand(dst, src);
5109 }
5110 
5111 void Assembler::stmxcsr( Address dst) {
5112   if (UseAVX > 0 ) {
5113     assert(VM_Version::supports_avx(), "");
5114     InstructionMark im(this);
5115     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5116     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5117     emit_int8((unsigned char)0xAE);
5118     emit_operand(as_Register(3), dst);
5119   } else {
5120     NOT_LP64(assert(VM_Version::supports_sse(), ""));
5121     InstructionMark im(this);
5122     prefix(dst);
5123     emit_int16(0x0F, (unsigned char)0xAE);
5124     emit_operand(as_Register(3), dst);
5125   }
5126 }
5127 
5128 void Assembler::subl(Address dst, int32_t imm32) {
5129   InstructionMark im(this);
5130   prefix(dst);
5131   emit_arith_operand(0x81, rbp, dst, imm32);
5132 }
5133 
5134 void Assembler::subl(Address dst, Register src) {
5135   InstructionMark im(this);
5136   prefix(dst, src);
5137   emit_int8(0x29);
5138   emit_operand(src, dst);
5139 }
5140 
5141 void Assembler::subl(Register dst, int32_t imm32) {
5142   prefix(dst);
5143   emit_arith(0x81, 0xE8, dst, imm32);
5144 }
5145 
5146 // Force generation of a 4 byte immediate value even if it fits into 8bit
5147 void Assembler::subl_imm32(Register dst, int32_t imm32) {
5148   prefix(dst);
5149   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5150 }
5151 
5152 void Assembler::subl(Register dst, Address src) {
5153   InstructionMark im(this);
5154   prefix(src, dst);
5155   emit_int8(0x2B);
5156   emit_operand(dst, src);
5157 }
5158 
5159 void Assembler::subl(Register dst, Register src) {
5160   (void) prefix_and_encode(dst->encoding(), src->encoding());
5161   emit_arith(0x2B, 0xC0, dst, src);
5162 }
5163 
5164 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
5165   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5166   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5167   attributes.set_rex_vex_w_reverted();
5168   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5169   emit_int16(0x5C, (0xC0 | encode));
5170 }
5171 
5172 void Assembler::subsd(XMMRegister dst, Address src) {
5173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5174   InstructionMark im(this);
5175   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5176   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5177   attributes.set_rex_vex_w_reverted();
5178   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5179   emit_int8(0x5C);
5180   emit_operand(dst, src);
5181 }
5182 
5183 void Assembler::subss(XMMRegister dst, XMMRegister src) {
5184   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5185   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
5186   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5187   emit_int16(0x5C, (0xC0 | encode));
5188 }
5189 
5190 void Assembler::subss(XMMRegister dst, Address src) {
5191   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5192   InstructionMark im(this);
5193   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5194   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5195   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5196   emit_int8(0x5C);
5197   emit_operand(dst, src);
5198 }
5199 
5200 void Assembler::testb(Register dst, int imm8) {
5201   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
5202   (void) prefix_and_encode(dst->encoding(), true);
5203   emit_arith_b(0xF6, 0xC0, dst, imm8);
5204 }
5205 
5206 void Assembler::testb(Address dst, int imm8) {
5207   InstructionMark im(this);
5208   prefix(dst);
5209   emit_int8((unsigned char)0xF6);
5210   emit_operand(rax, dst, 1);
5211   emit_int8(imm8);
5212 }
5213 
5214 void Assembler::testl(Register dst, int32_t imm32) {
5215   // not using emit_arith because test
5216   // doesn't support sign-extension of
5217   // 8bit operands
5218   int encode = dst->encoding();
5219   if (encode == 0) {
5220     emit_int8((unsigned char)0xA9);
5221   } else {
5222     encode = prefix_and_encode(encode);
5223     emit_int16((unsigned char)0xF7, (0xC0 | encode));
5224   }
5225   emit_int32(imm32);
5226 }
5227 
5228 void Assembler::testl(Register dst, Register src) {
5229   (void) prefix_and_encode(dst->encoding(), src->encoding());
5230   emit_arith(0x85, 0xC0, dst, src);
5231 }
5232 
5233 void Assembler::testl(Register dst, Address src) {
5234   InstructionMark im(this);
5235   prefix(src, dst);
5236   emit_int8((unsigned char)0x85);
5237   emit_operand(dst, src);
5238 }
5239 
5240 void Assembler::tzcntl(Register dst, Register src) {
5241   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5242   emit_int8((unsigned char)0xF3);
5243   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5244   emit_int24(0x0F,
5245              (unsigned char)0xBC,
5246              0xC0 | encode);
5247 }
5248 
5249 void Assembler::tzcntq(Register dst, Register src) {
5250   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5251   emit_int8((unsigned char)0xF3);
5252   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5253   emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
5254 }
5255 
5256 void Assembler::ucomisd(XMMRegister dst, Address src) {
5257   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5258   InstructionMark im(this);
5259   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5260   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5261   attributes.set_rex_vex_w_reverted();
5262   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5263   emit_int8(0x2E);
5264   emit_operand(dst, src);
5265 }
5266 
5267 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
5268   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5269   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5270   attributes.set_rex_vex_w_reverted();
5271   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5272   emit_int16(0x2E, (0xC0 | encode));
5273 }
5274 
5275 void Assembler::ucomiss(XMMRegister dst, Address src) {
5276   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5277   InstructionMark im(this);
5278   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5279   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5280   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5281   emit_int8(0x2E);
5282   emit_operand(dst, src);
5283 }
5284 
5285 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
5286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5287   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5288   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5289   emit_int16(0x2E, (0xC0 | encode));
5290 }
5291 
5292 void Assembler::xabort(int8_t imm8) {
5293   emit_int24((unsigned char)0xC6, (unsigned char)0xF8, (imm8 & 0xFF));
5294 }
5295 
5296 void Assembler::xaddb(Address dst, Register src) {
5297   InstructionMark im(this);
5298   prefix(dst, src, true);
5299   emit_int16(0x0F, (unsigned char)0xC0);
5300   emit_operand(src, dst);
5301 }
5302 
5303 void Assembler::xaddw(Address dst, Register src) {
5304   InstructionMark im(this);
5305   emit_int8(0x66);
5306   prefix(dst, src);
5307   emit_int16(0x0F, (unsigned char)0xC1);
5308   emit_operand(src, dst);
5309 }
5310 
5311 void Assembler::xaddl(Address dst, Register src) {
5312   InstructionMark im(this);
5313   prefix(dst, src);
5314   emit_int16(0x0F, (unsigned char)0xC1);
5315   emit_operand(src, dst);
5316 }
5317 
5318 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
5319   InstructionMark im(this);
5320   relocate(rtype);
5321   if (abort.is_bound()) {
5322     address entry = target(abort);
5323     assert(entry != NULL, "abort entry NULL");
5324     intptr_t offset = entry - pc();
5325     emit_int16((unsigned char)0xC7, (unsigned char)0xF8);
5326     emit_int32(offset - 6); // 2 opcode + 4 address
5327   } else {
5328     abort.add_patch_at(code(), locator());
5329     emit_int16((unsigned char)0xC7, (unsigned char)0xF8);
5330     emit_int32(0);
5331   }
5332 }
5333 
5334 void Assembler::xchgb(Register dst, Address src) { // xchg
5335   InstructionMark im(this);
5336   prefix(src, dst, true);
5337   emit_int8((unsigned char)0x86);
5338   emit_operand(dst, src);
5339 }
5340 
5341 void Assembler::xchgw(Register dst, Address src) { // xchg
5342   InstructionMark im(this);
5343   emit_int8(0x66);
5344   prefix(src, dst);
5345   emit_int8((unsigned char)0x87);
5346   emit_operand(dst, src);
5347 }
5348 
5349 void Assembler::xchgl(Register dst, Address src) { // xchg
5350   InstructionMark im(this);
5351   prefix(src, dst);
5352   emit_int8((unsigned char)0x87);
5353   emit_operand(dst, src);
5354 }
5355 
5356 void Assembler::xchgl(Register dst, Register src) {
5357   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5358   emit_int16((unsigned char)0x87, (0xC0 | encode));
5359 }
5360 
5361 void Assembler::xend() {
5362   emit_int24(0x0F, 0x01, (unsigned char)0xD5);
5363 }
5364 
5365 void Assembler::xgetbv() {
5366   emit_int24(0x0F, 0x01, (unsigned char)0xD0);
5367 }
5368 
5369 void Assembler::xorl(Register dst, int32_t imm32) {
5370   prefix(dst);
5371   emit_arith(0x81, 0xF0, dst, imm32);
5372 }
5373 
5374 void Assembler::xorl(Register dst, Address src) {
5375   InstructionMark im(this);
5376   prefix(src, dst);
5377   emit_int8(0x33);
5378   emit_operand(dst, src);
5379 }
5380 
5381 void Assembler::xorl(Register dst, Register src) {
5382   (void) prefix_and_encode(dst->encoding(), src->encoding());
5383   emit_arith(0x33, 0xC0, dst, src);
5384 }
5385 
5386 void Assembler::xorb(Register dst, Address src) {
5387   InstructionMark im(this);
5388   prefix(src, dst);
5389   emit_int8(0x32);
5390   emit_operand(dst, src);
5391 }
5392 
5393 void Assembler::xorw(Register dst, Register src) {
5394   (void)prefix_and_encode(dst->encoding(), src->encoding());
5395   emit_arith(0x33, 0xC0, dst, src);
5396 }
5397 
5398 // AVX 3-operands scalar float-point arithmetic instructions
5399 
5400 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
5401   assert(VM_Version::supports_avx(), "");
5402   InstructionMark im(this);
5403   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5404   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5405   attributes.set_rex_vex_w_reverted();
5406   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5407   emit_int8(0x58);
5408   emit_operand(dst, src);
5409 }
5410 
5411 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5412   assert(VM_Version::supports_avx(), "");
5413   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5414   attributes.set_rex_vex_w_reverted();
5415   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5416   emit_int16(0x58, (0xC0 | encode));
5417 }
5418 
5419 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
5420   assert(VM_Version::supports_avx(), "");
5421   InstructionMark im(this);
5422   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5423   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5424   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5425   emit_int8(0x58);
5426   emit_operand(dst, src);
5427 }
5428 
5429 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5430   assert(VM_Version::supports_avx(), "");
5431   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5432   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5433   emit_int16(0x58, (0xC0 | encode));
5434 }
5435 
5436 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
5437   assert(VM_Version::supports_avx(), "");
5438   InstructionMark im(this);
5439   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5440   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5441   attributes.set_rex_vex_w_reverted();
5442   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5443   emit_int8(0x5E);
5444   emit_operand(dst, src);
5445 }
5446 
5447 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5448   assert(VM_Version::supports_avx(), "");
5449   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5450   attributes.set_rex_vex_w_reverted();
5451   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5452   emit_int16(0x5E, (0xC0 | encode));
5453 }
5454 
5455 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
5456   assert(VM_Version::supports_avx(), "");
5457   InstructionMark im(this);
5458   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5459   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5460   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5461   emit_int8(0x5E);
5462   emit_operand(dst, src);
5463 }
5464 
5465 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5466   assert(VM_Version::supports_avx(), "");
5467   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5468   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5469   emit_int16(0x5E, (0xC0 | encode));
5470 }
5471 
5472 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5473   assert(VM_Version::supports_fma(), "");
5474   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5475   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5476   emit_int16((unsigned char)0xB9, (0xC0 | encode));
5477 }
5478 
5479 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5480   assert(VM_Version::supports_fma(), "");
5481   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5482   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5483   emit_int16((unsigned char)0xB9, (0xC0 | encode));
5484 }
5485 
5486 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
5487   assert(VM_Version::supports_avx(), "");
5488   InstructionMark im(this);
5489   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5490   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5491   attributes.set_rex_vex_w_reverted();
5492   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5493   emit_int8(0x59);
5494   emit_operand(dst, src);
5495 }
5496 
5497 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5498   assert(VM_Version::supports_avx(), "");
5499   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5500   attributes.set_rex_vex_w_reverted();
5501   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5502   emit_int16(0x59, (0xC0 | encode));
5503 }
5504 
5505 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
5506   assert(VM_Version::supports_avx(), "");
5507   InstructionMark im(this);
5508   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5509   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5510   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5511   emit_int8(0x59);
5512   emit_operand(dst, src);
5513 }
5514 
5515 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5516   assert(VM_Version::supports_avx(), "");
5517   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5518   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5519   emit_int16(0x59, (0xC0 | encode));
5520 }
5521 
5522 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
5523   assert(VM_Version::supports_avx(), "");
5524   InstructionMark im(this);
5525   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5526   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5527   attributes.set_rex_vex_w_reverted();
5528   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5529   emit_int8(0x5C);
5530   emit_operand(dst, src);
5531 }
5532 
5533 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5534   assert(VM_Version::supports_avx(), "");
5535   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5536   attributes.set_rex_vex_w_reverted();
5537   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5538   emit_int16(0x5C, (0xC0 | encode));
5539 }
5540 
5541 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
5542   assert(VM_Version::supports_avx(), "");
5543   InstructionMark im(this);
5544   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5545   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5546   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5547   emit_int8(0x5C);
5548   emit_operand(dst, src);
5549 }
5550 
5551 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5552   assert(VM_Version::supports_avx(), "");
5553   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5554   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5555   emit_int16(0x5C, (0xC0 | encode));
5556 }
5557 
5558 //====================VECTOR ARITHMETIC=====================================
5559 
5560 // Float-point vector arithmetic
5561 
5562 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5563   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5564   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5565   attributes.set_rex_vex_w_reverted();
5566   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5567   emit_int16(0x58, (0xC0 | encode));
5568 }
5569 
5570 void Assembler::addpd(XMMRegister dst, Address src) {
5571   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5572   InstructionMark im(this);
5573   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5574   attributes.set_rex_vex_w_reverted();
5575   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5576   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5577   emit_int8(0x58);
5578   emit_operand(dst, src);
5579 }
5580 
5581 
5582 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5584   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5585   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5586   emit_int16(0x58, (0xC0 | encode));
5587 }
5588 
5589 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5590   assert(VM_Version::supports_avx(), "");
5591   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5592   attributes.set_rex_vex_w_reverted();
5593   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5594   emit_int16(0x58, (0xC0 | encode));
5595 }
5596 
5597 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5598   assert(VM_Version::supports_avx(), "");
5599   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5600   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5601   emit_int16(0x58, (0xC0 | encode));
5602 }
5603 
5604 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5605   assert(VM_Version::supports_avx(), "");
5606   InstructionMark im(this);
5607   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5608   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5609   attributes.set_rex_vex_w_reverted();
5610   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5611   emit_int8(0x58);
5612   emit_operand(dst, src);
5613 }
5614 
5615 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5616   assert(VM_Version::supports_avx(), "");
5617   InstructionMark im(this);
5618   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5619   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5620   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5621   emit_int8(0x58);
5622   emit_operand(dst, src);
5623 }
5624 
5625 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5626   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5627   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5628   attributes.set_rex_vex_w_reverted();
5629   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5630   emit_int16(0x5C, (0xC0 | encode));
5631 }
5632 
5633 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5634   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5635   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5636   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5637   emit_int16(0x5C, (0xC0 | encode));
5638 }
5639 
5640 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5641   assert(VM_Version::supports_avx(), "");
5642   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5643   attributes.set_rex_vex_w_reverted();
5644   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5645   emit_int16(0x5C, (0xC0 | encode));
5646 }
5647 
5648 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5649   assert(VM_Version::supports_avx(), "");
5650   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5651   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5652   emit_int16(0x5C, (0xC0 | encode));
5653 }
5654 
5655 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5656   assert(VM_Version::supports_avx(), "");
5657   InstructionMark im(this);
5658   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5659   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5660   attributes.set_rex_vex_w_reverted();
5661   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5662   emit_int8(0x5C);
5663   emit_operand(dst, src);
5664 }
5665 
5666 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5667   assert(VM_Version::supports_avx(), "");
5668   InstructionMark im(this);
5669   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5670   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5671   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5672   emit_int8(0x5C);
5673   emit_operand(dst, src);
5674 }
5675 
5676 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5678   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5679   attributes.set_rex_vex_w_reverted();
5680   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5681   emit_int16(0x59, (0xC0 | encode));
5682 }
5683 
5684 void Assembler::mulpd(XMMRegister dst, Address src) {
5685   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5686   InstructionMark im(this);
5687   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5688   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5689   attributes.set_rex_vex_w_reverted();
5690   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5691   emit_int8(0x59);
5692   emit_operand(dst, src);
5693 }
5694 
5695 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5696   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5697   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5698   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5699   emit_int16(0x59, (0xC0 | encode));
5700 }
5701 
5702 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5703   assert(VM_Version::supports_avx(), "");
5704   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5705   attributes.set_rex_vex_w_reverted();
5706   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5707   emit_int16(0x59, (0xC0 | encode));
5708 }
5709 
5710 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5711   assert(VM_Version::supports_avx(), "");
5712   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5713   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5714   emit_int16(0x59, (0xC0 | encode));
5715 }
5716 
5717 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5718   assert(VM_Version::supports_avx(), "");
5719   InstructionMark im(this);
5720   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5721   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5722   attributes.set_rex_vex_w_reverted();
5723   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5724   emit_int8(0x59);
5725   emit_operand(dst, src);
5726 }
5727 
5728 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5729   assert(VM_Version::supports_avx(), "");
5730   InstructionMark im(this);
5731   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5732   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5733   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5734   emit_int8(0x59);
5735   emit_operand(dst, src);
5736 }
5737 
5738 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5739   assert(VM_Version::supports_fma(), "");
5740   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5741   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5742   emit_int16((unsigned char)0xB8, (0xC0 | encode));
5743 }
5744 
5745 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5746   assert(VM_Version::supports_fma(), "");
5747   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5748   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5749   emit_int16((unsigned char)0xB8, (0xC0 | encode));
5750 }
5751 
5752 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5753   assert(VM_Version::supports_fma(), "");
5754   InstructionMark im(this);
5755   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5756   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5757   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5758   emit_int8((unsigned char)0xB8);
5759   emit_operand(dst, src2);
5760 }
5761 
5762 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5763   assert(VM_Version::supports_fma(), "");
5764   InstructionMark im(this);
5765   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5766   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5767   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5768   emit_int8((unsigned char)0xB8);
5769   emit_operand(dst, src2);
5770 }
5771 
5772 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5774   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5775   attributes.set_rex_vex_w_reverted();
5776   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5777   emit_int16(0x5E, (0xC0 | encode));
5778 }
5779 
5780 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5781   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5782   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5783   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5784   emit_int16(0x5E, (0xC0 | encode));
5785 }
5786 
5787 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5788   assert(VM_Version::supports_avx(), "");
5789   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5790   attributes.set_rex_vex_w_reverted();
5791   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5792   emit_int16(0x5E, (0xC0 | encode));
5793 }
5794 
5795 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5796   assert(VM_Version::supports_avx(), "");
5797   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5798   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5799   emit_int16(0x5E, (0xC0 | encode));
5800 }
5801 
5802 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5803   assert(VM_Version::supports_avx(), "");
5804   InstructionMark im(this);
5805   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5806   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5807   attributes.set_rex_vex_w_reverted();
5808   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5809   emit_int8(0x5E);
5810   emit_operand(dst, src);
5811 }
5812 
5813 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5814   assert(VM_Version::supports_avx(), "");
5815   InstructionMark im(this);
5816   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5817   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5818   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5819   emit_int8(0x5E);
5820   emit_operand(dst, src);
5821 }
5822 
5823 void Assembler::vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len) {
5824   assert(VM_Version::supports_avx(), "");
5825   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5826   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5827   emit_int24(0x09, (0xC0 | encode), (rmode));
5828 }
5829 
5830 void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode,  int vector_len) {
5831   assert(VM_Version::supports_avx(), "");
5832   InstructionMark im(this);
5833   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5834   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5835   emit_int8(0x09);
5836   emit_operand(dst, src);
5837   emit_int8((rmode));
5838 }
5839 
5840 void Assembler::vrndscalepd(XMMRegister dst,  XMMRegister src,  int32_t rmode, int vector_len) {
5841   assert(VM_Version::supports_evex(), "requires EVEX support");
5842   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5843   attributes.set_is_evex_instruction();
5844   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5845   emit_int24(0x09, (0xC0 | encode), (rmode));
5846 }
5847 
5848 void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len) {
5849   assert(VM_Version::supports_evex(), "requires EVEX support");
5850   assert(dst != xnoreg, "sanity");
5851   InstructionMark im(this);
5852   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5853   attributes.set_is_evex_instruction();
5854   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5855   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5856   emit_int8(0x09);
5857   emit_operand(dst, src);
5858   emit_int8((rmode));
5859 }
5860 
5861 
5862 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5863   assert(VM_Version::supports_avx(), "");
5864   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5865   attributes.set_rex_vex_w_reverted();
5866   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5867   emit_int16(0x51, (0xC0 | encode));
5868 }
5869 
5870 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5871   assert(VM_Version::supports_avx(), "");
5872   InstructionMark im(this);
5873   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5874   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5875   attributes.set_rex_vex_w_reverted();
5876   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5877   emit_int8(0x51);
5878   emit_operand(dst, src);
5879 }
5880 
5881 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5882   assert(VM_Version::supports_avx(), "");
5883   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5884   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5885   emit_int16(0x51, (0xC0 | encode));
5886 }
5887 
5888 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5889   assert(VM_Version::supports_avx(), "");
5890   InstructionMark im(this);
5891   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5892   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5893   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5894   emit_int8(0x51);
5895   emit_operand(dst, src);
5896 }
5897 
5898 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5899   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5900   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5901   attributes.set_rex_vex_w_reverted();
5902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5903   emit_int16(0x54, (0xC0 | encode));
5904 }
5905 
5906 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5907   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5908   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5909   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5910   emit_int16(0x54, (0xC0 | encode));
5911 }
5912 
5913 void Assembler::andps(XMMRegister dst, Address src) {
5914   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5915   InstructionMark im(this);
5916   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5917   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5918   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5919   emit_int8(0x54);
5920   emit_operand(dst, src);
5921 }
5922 
5923 void Assembler::andpd(XMMRegister dst, Address src) {
5924   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5925   InstructionMark im(this);
5926   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5927   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5928   attributes.set_rex_vex_w_reverted();
5929   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5930   emit_int8(0x54);
5931   emit_operand(dst, src);
5932 }
5933 
5934 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5935   assert(VM_Version::supports_avx(), "");
5936   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5937   attributes.set_rex_vex_w_reverted();
5938   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5939   emit_int16(0x54, (0xC0 | encode));
5940 }
5941 
5942 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5943   assert(VM_Version::supports_avx(), "");
5944   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5945   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5946   emit_int16(0x54, (0xC0 | encode));
5947 }
5948 
5949 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5950   assert(VM_Version::supports_avx(), "");
5951   InstructionMark im(this);
5952   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5953   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5954   attributes.set_rex_vex_w_reverted();
5955   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5956   emit_int8(0x54);
5957   emit_operand(dst, src);
5958 }
5959 
5960 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5961   assert(VM_Version::supports_avx(), "");
5962   InstructionMark im(this);
5963   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5964   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5965   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5966   emit_int8(0x54);
5967   emit_operand(dst, src);
5968 }
5969 
5970 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5971   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5972   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5973   attributes.set_rex_vex_w_reverted();
5974   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5975   emit_int8(0x15);
5976   emit_int8((0xC0 | encode));
5977 }
5978 
5979 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5981   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5982   attributes.set_rex_vex_w_reverted();
5983   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5984   emit_int16(0x14, (0xC0 | encode));
5985 }
5986 
5987 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5988   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5989   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5990   attributes.set_rex_vex_w_reverted();
5991   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5992   emit_int16(0x57, (0xC0 | encode));
5993 }
5994 
5995 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5996   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5997   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5998   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5999   emit_int16(0x57, (0xC0 | encode));
6000 }
6001 
6002 void Assembler::xorpd(XMMRegister dst, Address src) {
6003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6004   InstructionMark im(this);
6005   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6006   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6007   attributes.set_rex_vex_w_reverted();
6008   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6009   emit_int8(0x57);
6010   emit_operand(dst, src);
6011 }
6012 
6013 void Assembler::xorps(XMMRegister dst, Address src) {
6014   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6015   InstructionMark im(this);
6016   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6017   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6018   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6019   emit_int8(0x57);
6020   emit_operand(dst, src);
6021 }
6022 
6023 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6024   assert(VM_Version::supports_avx(), "");
6025   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6026   attributes.set_rex_vex_w_reverted();
6027   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6028   emit_int16(0x57, (0xC0 | encode));
6029 }
6030 
6031 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6032   assert(VM_Version::supports_avx(), "");
6033   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6034   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6035   emit_int16(0x57, (0xC0 | encode));
6036 }
6037 
6038 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6039   assert(VM_Version::supports_avx(), "");
6040   InstructionMark im(this);
6041   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6042   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6043   attributes.set_rex_vex_w_reverted();
6044   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6045   emit_int8(0x57);
6046   emit_operand(dst, src);
6047 }
6048 
6049 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6050   assert(VM_Version::supports_avx(), "");
6051   InstructionMark im(this);
6052   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6053   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6054   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6055   emit_int8(0x57);
6056   emit_operand(dst, src);
6057 }
6058 
6059 // Integer vector arithmetic
6060 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6061   assert(VM_Version::supports_avx() && (vector_len == 0) ||
6062          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
6063   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6064   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6065   emit_int16(0x01, (0xC0 | encode));
6066 }
6067 
6068 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6069   assert(VM_Version::supports_avx() && (vector_len == 0) ||
6070          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
6071   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6072   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6073   emit_int16(0x02, (0xC0 | encode));
6074 }
6075 
6076 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
6077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6079   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6080   emit_int16((unsigned char)0xFC, (0xC0 | encode));
6081 }
6082 
6083 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
6084   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6085   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6086   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6087   emit_int16((unsigned char)0xFD, (0xC0 | encode));
6088 }
6089 
6090 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
6091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6092   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6093   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6094   emit_int16((unsigned char)0xFE, (0xC0 | encode));
6095 }
6096 
6097 void Assembler::paddd(XMMRegister dst, Address src) {
6098   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6099   InstructionMark im(this);
6100   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6101   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6102   emit_int8((unsigned char)0xFE);
6103   emit_operand(dst, src);
6104 }
6105 
6106 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
6107   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6108   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6109   attributes.set_rex_vex_w_reverted();
6110   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6111   emit_int16((unsigned char)0xD4, (0xC0 | encode));
6112 }
6113 
6114 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
6115   assert(VM_Version::supports_sse3(), "");
6116   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6117   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6118   emit_int16(0x01, (0xC0 | encode));
6119 }
6120 
6121 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
6122   assert(VM_Version::supports_sse3(), "");
6123   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6124   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6125   emit_int16(0x02, (0xC0 | encode));
6126 }
6127 
6128 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6129   assert(UseAVX > 0, "requires some form of AVX");
6130   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6131   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6132   emit_int16((unsigned char)0xFC, (0xC0 | encode));
6133 }
6134 
6135 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6136   assert(UseAVX > 0, "requires some form of AVX");
6137   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6138   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int16((unsigned char)0xFD, (0xC0 | encode));
6140 }
6141 
6142 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6143   assert(UseAVX > 0, "requires some form of AVX");
6144   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6145   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6146   emit_int16((unsigned char)0xFE, (0xC0 | encode));
6147 }
6148 
6149 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6150   assert(UseAVX > 0, "requires some form of AVX");
6151   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6152   attributes.set_rex_vex_w_reverted();
6153   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6154   emit_int16((unsigned char)0xD4, (0xC0 | encode));
6155 }
6156 
6157 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6158   assert(UseAVX > 0, "requires some form of AVX");
6159   InstructionMark im(this);
6160   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6161   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6162   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6163   emit_int8((unsigned char)0xFC);
6164   emit_operand(dst, src);
6165 }
6166 
6167 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6168   assert(UseAVX > 0, "requires some form of AVX");
6169   InstructionMark im(this);
6170   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6171   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6172   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6173   emit_int8((unsigned char)0xFD);
6174   emit_operand(dst, src);
6175 }
6176 
6177 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6178   assert(UseAVX > 0, "requires some form of AVX");
6179   InstructionMark im(this);
6180   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6181   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6182   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6183   emit_int8((unsigned char)0xFE);
6184   emit_operand(dst, src);
6185 }
6186 
6187 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6188   assert(UseAVX > 0, "requires some form of AVX");
6189   InstructionMark im(this);
6190   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6191   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6192   attributes.set_rex_vex_w_reverted();
6193   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6194   emit_int8((unsigned char)0xD4);
6195   emit_operand(dst, src);
6196 }
6197 
6198 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
6199   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6200   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6201   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6202   emit_int16((unsigned char)0xF8, (0xC0 | encode));
6203 }
6204 
6205 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
6206   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6207   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6208   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6209   emit_int16((unsigned char)0xF9, (0xC0 | encode));
6210 }
6211 
6212 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
6213   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6214   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6215   emit_int16((unsigned char)0xFA, (0xC0 | encode));
6216 }
6217 
6218 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
6219   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6220   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6221   attributes.set_rex_vex_w_reverted();
6222   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6223   emit_int8((unsigned char)0xFB);
6224   emit_int8((0xC0 | encode));
6225 }
6226 
6227 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6228   assert(UseAVX > 0, "requires some form of AVX");
6229   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6230   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6231   emit_int16((unsigned char)0xF8, (0xC0 | encode));
6232 }
6233 
6234 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6235   assert(UseAVX > 0, "requires some form of AVX");
6236   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6237   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6238   emit_int16((unsigned char)0xF9, (0xC0 | encode));
6239 }
6240 
6241 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6242   assert(UseAVX > 0, "requires some form of AVX");
6243   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6244   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6245   emit_int16((unsigned char)0xFA, (0xC0 | encode));
6246 }
6247 
6248 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6249   assert(UseAVX > 0, "requires some form of AVX");
6250   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6251   attributes.set_rex_vex_w_reverted();
6252   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6253   emit_int16((unsigned char)0xFB, (0xC0 | encode));
6254 }
6255 
6256 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6257   assert(UseAVX > 0, "requires some form of AVX");
6258   InstructionMark im(this);
6259   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6260   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6261   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6262   emit_int8((unsigned char)0xF8);
6263   emit_operand(dst, src);
6264 }
6265 
6266 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6267   assert(UseAVX > 0, "requires some form of AVX");
6268   InstructionMark im(this);
6269   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6270   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6271   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6272   emit_int8((unsigned char)0xF9);
6273   emit_operand(dst, src);
6274 }
6275 
6276 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6277   assert(UseAVX > 0, "requires some form of AVX");
6278   InstructionMark im(this);
6279   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6280   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6281   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6282   emit_int8((unsigned char)0xFA);
6283   emit_operand(dst, src);
6284 }
6285 
6286 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6287   assert(UseAVX > 0, "requires some form of AVX");
6288   InstructionMark im(this);
6289   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6290   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6291   attributes.set_rex_vex_w_reverted();
6292   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6293   emit_int8((unsigned char)0xFB);
6294   emit_operand(dst, src);
6295 }
6296 
6297 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
6298   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6299   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6300   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6301   emit_int16((unsigned char)0xD5, (0xC0 | encode));
6302 }
6303 
6304 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
6305   assert(VM_Version::supports_sse4_1(), "");
6306   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6307   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6308   emit_int16(0x40, (0xC0 | encode));
6309 }
6310 
6311 void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
6312   assert(VM_Version::supports_sse2(), "");
6313   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6315   emit_int16((unsigned char)0xF4, (0xC0 | encode));
6316 }
6317 
6318 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6319   assert(UseAVX > 0, "requires some form of AVX");
6320   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6321   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6322   emit_int16((unsigned char)0xD5, (0xC0 | encode));
6323 }
6324 
6325 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6326   assert(UseAVX > 0, "requires some form of AVX");
6327   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6328   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6329   emit_int16(0x40, (0xC0 | encode));
6330 }
6331 
6332 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6333   assert(UseAVX > 2, "requires some form of EVEX");
6334   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6335   attributes.set_is_evex_instruction();
6336   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6337   emit_int16(0x40, (0xC0 | encode));
6338 }
6339 
6340 void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6341   assert(UseAVX > 0, "requires some form of AVX");
6342   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6343   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6344   emit_int16((unsigned char)0xF4, (0xC0 | encode));
6345 }
6346 
6347 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6348   assert(UseAVX > 0, "requires some form of AVX");
6349   InstructionMark im(this);
6350   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6351   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6352   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6353   emit_int8((unsigned char)0xD5);
6354   emit_operand(dst, src);
6355 }
6356 
6357 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6358   assert(UseAVX > 0, "requires some form of AVX");
6359   InstructionMark im(this);
6360   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6361   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6362   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6363   emit_int8(0x40);
6364   emit_operand(dst, src);
6365 }
6366 
6367 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6368   assert(UseAVX > 2, "requires some form of EVEX");
6369   InstructionMark im(this);
6370   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6371   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6372   attributes.set_is_evex_instruction();
6373   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6374   emit_int8(0x40);
6375   emit_operand(dst, src);
6376 }
6377 
6378 // Min, max
6379 void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
6380   assert(VM_Version::supports_sse4_1(), "");
6381   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6382   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6383   emit_int16(0x38, (0xC0 | encode));
6384 }
6385 
6386 void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6387   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6388         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6389   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6390   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6391   emit_int16(0x38, (0xC0 | encode));
6392 }
6393 
6394 void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
6395   assert(VM_Version::supports_sse2(), "");
6396   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6397   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6398   emit_int16((unsigned char)0xEA, (0xC0 | encode));
6399 }
6400 
6401 void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6402   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6403         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6404   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6405   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6406   emit_int16((unsigned char)0xEA, (0xC0 | encode));
6407 }
6408 
6409 void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
6410   assert(VM_Version::supports_sse4_1(), "");
6411   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6412   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6413   emit_int16(0x39, (0xC0 | encode));
6414 }
6415 
6416 void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6417   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6418         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6419   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6420   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6421   emit_int16(0x39, (0xC0 | encode));
6422 }
6423 
6424 void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6425   assert(UseAVX > 2, "requires AVX512F");
6426   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6427   attributes.set_is_evex_instruction();
6428   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6429   emit_int16(0x39, (0xC0 | encode));
6430 }
6431 
6432 void Assembler::minps(XMMRegister dst, XMMRegister src) {
6433   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6434   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6435   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6436   emit_int16(0x5D, (0xC0 | encode));
6437 }
6438 void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6439   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6440   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6441   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6442   emit_int16(0x5D, (0xC0 | encode));
6443 }
6444 
6445 void Assembler::minpd(XMMRegister dst, XMMRegister src) {
6446   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6447   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6448   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6449   emit_int16(0x5D, (0xC0 | encode));
6450 }
6451 void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6452   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6453   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6454   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6455   emit_int16(0x5D, (0xC0 | encode));
6456 }
6457 
6458 void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
6459   assert(VM_Version::supports_sse4_1(), "");
6460   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6461   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6462   emit_int16(0x3C, (0xC0 | encode));
6463 }
6464 
6465 void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6466   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6467         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6468   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6469   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6470   emit_int16(0x3C, (0xC0 | encode));
6471 }
6472 
6473 void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
6474   assert(VM_Version::supports_sse2(), "");
6475   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6476   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6477   emit_int16((unsigned char)0xEE, (0xC0 | encode));
6478 }
6479 
6480 void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6481   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6482         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6483   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6484   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6485   emit_int16((unsigned char)0xEE, (0xC0 | encode));
6486 }
6487 
6488 void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
6489   assert(VM_Version::supports_sse4_1(), "");
6490   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6491   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6492   emit_int16(0x3D, (0xC0 | encode));
6493 }
6494 
6495 void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6496   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6497         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6498   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6499   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6500   emit_int16(0x3D, (0xC0 | encode));
6501 }
6502 
6503 void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6504   assert(UseAVX > 2, "requires AVX512F");
6505   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6506   attributes.set_is_evex_instruction();
6507   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6508   emit_int16(0x3D, (0xC0 | encode));
6509 }
6510 
6511 void Assembler::maxps(XMMRegister dst, XMMRegister src) {
6512   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6513   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6514   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6515   emit_int16(0x5F, (0xC0 | encode));
6516 }
6517 
6518 void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6519   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6520   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6521   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6522   emit_int16(0x5F, (0xC0 | encode));
6523 }
6524 
6525 void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
6526   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6527   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6528   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6529   emit_int16(0x5F, (0xC0 | encode));
6530 }
6531 
6532 void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6533   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6534   InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6535   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6536   emit_int16(0x5F, (0xC0 | encode));
6537 }
6538 
6539 // Shift packed integers left by specified number of bits.
6540 void Assembler::psllw(XMMRegister dst, int shift) {
6541   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6542   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6543   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6544   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6545   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6546 }
6547 
6548 void Assembler::pslld(XMMRegister dst, int shift) {
6549   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6550   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6551   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6552   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6553   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6554 }
6555 
6556 void Assembler::psllq(XMMRegister dst, int shift) {
6557   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6558   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6559   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6560   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6561   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6562 }
6563 
6564 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
6565   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6566   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6567   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6568   emit_int16((unsigned char)0xF1, (0xC0 | encode));
6569 }
6570 
6571 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
6572   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6573   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6574   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6575   emit_int16((unsigned char)0xF2, (0xC0 | encode));
6576 }
6577 
6578 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
6579   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6580   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6581   attributes.set_rex_vex_w_reverted();
6582   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6583   emit_int16((unsigned char)0xF3, (0xC0 | encode));
6584 }
6585 
6586 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6587   assert(UseAVX > 0, "requires some form of AVX");
6588   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6589   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6590   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6591   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6592 }
6593 
6594 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6595   assert(UseAVX > 0, "requires some form of AVX");
6596   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6597   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6598   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6599   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6600   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6601 }
6602 
6603 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6604   assert(UseAVX > 0, "requires some form of AVX");
6605   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6606   attributes.set_rex_vex_w_reverted();
6607   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6608   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6609   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6610 }
6611 
6612 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6613   assert(UseAVX > 0, "requires some form of AVX");
6614   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6615   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6616   emit_int16((unsigned char)0xF1, (0xC0 | encode));
6617 }
6618 
6619 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6620   assert(UseAVX > 0, "requires some form of AVX");
6621   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6622   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6623   emit_int16((unsigned char)0xF2, (0xC0 | encode));
6624 }
6625 
6626 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6627   assert(UseAVX > 0, "requires some form of AVX");
6628   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6629   attributes.set_rex_vex_w_reverted();
6630   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6631   emit_int16((unsigned char)0xF3, (0xC0 | encode));
6632 }
6633 
6634 // Shift packed integers logically right by specified number of bits.
6635 void Assembler::psrlw(XMMRegister dst, int shift) {
6636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6637   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6638   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6639   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6640   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6641 }
6642 
6643 void Assembler::psrld(XMMRegister dst, int shift) {
6644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6645   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6646   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6647   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6648   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6649 }
6650 
6651 void Assembler::psrlq(XMMRegister dst, int shift) {
6652   // Do not confuse it with psrldq SSE2 instruction which
6653   // shifts 128 bit value in xmm register by number of bytes.
6654   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6655   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6656   attributes.set_rex_vex_w_reverted();
6657   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6658   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6659   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6660 }
6661 
6662 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
6663   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6664   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6665   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6666   emit_int16((unsigned char)0xD1, (0xC0 | encode));
6667 }
6668 
6669 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
6670   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6671   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6672   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6673   emit_int16((unsigned char)0xD2, (0xC0 | encode));
6674 }
6675 
6676 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
6677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6678   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6679   attributes.set_rex_vex_w_reverted();
6680   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6681   emit_int16((unsigned char)0xD3, (0xC0 | encode));
6682 }
6683 
6684 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6685   assert(UseAVX > 0, "requires some form of AVX");
6686   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6687   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6688   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6689   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6690 }
6691 
6692 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6693   assert(UseAVX > 0, "requires some form of AVX");
6694   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6695   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6696   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6697   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6698 }
6699 
6700 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6701   assert(UseAVX > 0, "requires some form of AVX");
6702   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6703   attributes.set_rex_vex_w_reverted();
6704   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6705   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6706   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6707 }
6708 
6709 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6710   assert(UseAVX > 0, "requires some form of AVX");
6711   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6712   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6713   emit_int16((unsigned char)0xD1, (0xC0 | encode));
6714 }
6715 
6716 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6717   assert(UseAVX > 0, "requires some form of AVX");
6718   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6719   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6720   emit_int16((unsigned char)0xD2, (0xC0 | encode));
6721 }
6722 
6723 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6724   assert(UseAVX > 0, "requires some form of AVX");
6725   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6726   attributes.set_rex_vex_w_reverted();
6727   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6728   emit_int16((unsigned char)0xD3, (0xC0 | encode));
6729 }
6730 
6731 void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6732   assert(VM_Version::supports_avx512bw(), "");
6733   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6734   attributes.set_is_evex_instruction();
6735   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6736   emit_int16(0x10, (0xC0 | encode));
6737 }
6738 
6739 void Assembler::evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6740   assert(VM_Version::supports_avx512bw(), "");
6741   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6742   attributes.set_is_evex_instruction();
6743   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6744   emit_int16(0x12, (0xC0 | encode));
6745 }
6746 
6747 // Shift packed integers arithmetically right by specified number of bits.
6748 void Assembler::psraw(XMMRegister dst, int shift) {
6749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6750   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6751   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6752   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6753   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6754 }
6755 
6756 void Assembler::psrad(XMMRegister dst, int shift) {
6757   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6758   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6759   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6760   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6761   emit_int8(0x72);
6762   emit_int8((0xC0 | encode));
6763   emit_int8(shift & 0xFF);
6764 }
6765 
6766 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6767   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6768   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6769   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6770   emit_int16((unsigned char)0xE1, (0xC0 | encode));
6771 }
6772 
6773 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6776   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6777   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6778 }
6779 
6780 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6781   assert(UseAVX > 0, "requires some form of AVX");
6782   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6783   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6784   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6785   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6786 }
6787 
6788 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6789   assert(UseAVX > 0, "requires some form of AVX");
6790   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6791   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6792   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6793   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6794 }
6795 
6796 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6797   assert(UseAVX > 0, "requires some form of AVX");
6798   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6799   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6800   emit_int16((unsigned char)0xE1, (0xC0 | encode));
6801 }
6802 
6803 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6804   assert(UseAVX > 0, "requires some form of AVX");
6805   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6806   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6807   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6808 }
6809 
6810 void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6811   assert(UseAVX > 2, "requires AVX512");
6812   assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
6813   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6814   attributes.set_is_evex_instruction();
6815   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6816   emit_int24((unsigned char)0x72, (0xC0 | encode), shift & 0xFF);
6817 }
6818 
6819 void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6820   assert(UseAVX > 2, "requires AVX512");
6821   assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
6822   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6823   attributes.set_is_evex_instruction();
6824   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6825   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6826 }
6827 
6828 // logical operations packed integers
6829 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6830   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6831   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6832   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6833   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6834 }
6835 
6836 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6837   assert(UseAVX > 0, "requires some form of AVX");
6838   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6839   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6840   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6841 }
6842 
6843 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6844   assert(UseAVX > 0, "requires some form of AVX");
6845   InstructionMark im(this);
6846   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6847   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6848   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6849   emit_int8((unsigned char)0xDB);
6850   emit_operand(dst, src);
6851 }
6852 
6853 void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6854   assert(VM_Version::supports_evex(), "");
6855   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6856   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6857   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6858 }
6859 
6860 //Variable Shift packed integers logically left.
6861 void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6862   assert(UseAVX > 1, "requires AVX2");
6863   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6864   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6865   emit_int16(0x47, (0xC0 | encode));
6866 }
6867 
6868 void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6869   assert(UseAVX > 1, "requires AVX2");
6870   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6871   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6872   emit_int16(0x47, (0xC0 | encode));
6873 }
6874 
6875 //Variable Shift packed integers logically right.
6876 void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6877   assert(UseAVX > 1, "requires AVX2");
6878   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6879   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6880   emit_int16(0x45, (0xC0 | encode));
6881 }
6882 
6883 void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6884   assert(UseAVX > 1, "requires AVX2");
6885   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6886   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6887   emit_int16(0x45, (0xC0 | encode));
6888 }
6889 
6890 //Variable right Shift arithmetic packed integers .
6891 void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6892   assert(UseAVX > 1, "requires AVX2");
6893   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6894   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6895   emit_int16(0x46, (0xC0 | encode));
6896 }
6897 
6898 void Assembler::evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6899   assert(VM_Version::supports_avx512bw(), "");
6900   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6901   attributes.set_is_evex_instruction();
6902   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6903   emit_int16(0x11, (0xC0 | encode));
6904 }
6905 
6906 void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6907   assert(UseAVX > 2, "requires AVX512");
6908   assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
6909   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6910   attributes.set_is_evex_instruction();
6911   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6912   emit_int16(0x46, (0xC0 | encode));
6913 }
6914 
6915 void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6916   assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
6917   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6918   attributes.set_is_evex_instruction();
6919   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6920   emit_int16(0x71, (0xC0 | encode));
6921 }
6922 
6923 void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6924   assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
6925   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6926   attributes.set_is_evex_instruction();
6927   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6928   emit_int16(0x73, (0xC0 | encode));
6929 }
6930 
6931 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6933   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6934   attributes.set_rex_vex_w_reverted();
6935   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6936   emit_int16((unsigned char)0xDF, (0xC0 | encode));
6937 }
6938 
6939 void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6940   assert(UseAVX > 0, "requires some form of AVX");
6941   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6942   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6943   emit_int16((unsigned char)0xDF, (0xC0 | encode));
6944 }
6945 
6946 void Assembler::por(XMMRegister dst, XMMRegister src) {
6947   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6948   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6949   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6950   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6951 }
6952 
6953 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6954   assert(UseAVX > 0, "requires some form of AVX");
6955   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6956   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6957   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6958 }
6959 
6960 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6961   assert(UseAVX > 0, "requires some form of AVX");
6962   InstructionMark im(this);
6963   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6964   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6965   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6966   emit_int8((unsigned char)0xEB);
6967   emit_operand(dst, src);
6968 }
6969 
6970 void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6971   assert(VM_Version::supports_evex(), "");
6972   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6973   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6974   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6975 }
6976 
6977 
6978 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6979   assert(VM_Version::supports_evex(), "");
6980   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6981   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6982   attributes.set_is_evex_instruction();
6983   attributes.set_embedded_opmask_register_specifier(mask);
6984   if (merge) {
6985     attributes.reset_is_clear_context();
6986   }
6987   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6988   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6989 }
6990 
6991 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
6992   assert(VM_Version::supports_evex(), "");
6993   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6994   InstructionMark im(this);
6995   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6996   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
6997   attributes.set_is_evex_instruction();
6998   attributes.set_embedded_opmask_register_specifier(mask);
6999   if (merge) {
7000     attributes.reset_is_clear_context();
7001   }
7002   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7003   emit_int8((unsigned char)0xEB);
7004   emit_operand(dst, src);
7005 }
7006 
7007 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
7008   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7010   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7011   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7012 }
7013 
7014 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7015   assert(UseAVX > 0, "requires some form of AVX");
7016   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7017   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7018   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7019 }
7020 
7021 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
7022   assert(UseAVX > 0, "requires some form of AVX");
7023   InstructionMark im(this);
7024   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7025   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
7026   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7027   emit_int8((unsigned char)0xEF);
7028   emit_operand(dst, src);
7029 }
7030 
7031 void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7032   assert(UseAVX > 2, "requires some form of EVEX");
7033   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7034   attributes.set_rex_vex_w_reverted();
7035   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7036   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7037 }
7038 
7039 void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
7040   assert(VM_Version::supports_evex(), "");
7041   // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
7042   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7043   attributes.set_is_evex_instruction();
7044   attributes.set_embedded_opmask_register_specifier(mask);
7045   if (merge) {
7046     attributes.reset_is_clear_context();
7047   }
7048   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7049   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7050 }
7051 
7052 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7053   assert(VM_Version::supports_evex(), "requires EVEX support");
7054   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7055   attributes.set_is_evex_instruction();
7056   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7057   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7058 }
7059 
7060 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
7061   assert(VM_Version::supports_evex(), "requires EVEX support");
7062   assert(dst != xnoreg, "sanity");
7063   InstructionMark im(this);
7064   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7065   attributes.set_is_evex_instruction();
7066   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
7067   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7068   emit_int8((unsigned char)0xEF);
7069   emit_operand(dst, src);
7070 }
7071 
7072 void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
7073   assert(VM_Version::supports_evex(), "requires EVEX support");
7074   assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
7075   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7076   attributes.set_is_evex_instruction();
7077   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7078   emit_int8(0x25);
7079   emit_int8((unsigned char)(0xC0 | encode));
7080   emit_int8(imm8);
7081 }
7082 
7083 void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len) {
7084   assert(VM_Version::supports_evex(), "requires EVEX support");
7085   assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
7086   assert(dst != xnoreg, "sanity");
7087   InstructionMark im(this);
7088   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7089   attributes.set_is_evex_instruction();
7090   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
7091   vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7092   emit_int8(0x25);
7093   emit_operand(dst, src3);
7094   emit_int8(imm8);
7095 }
7096 
7097 void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
7098   assert(VM_Version::supports_evex(), "requires EVEX support");
7099   assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
7100   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7101   attributes.set_is_evex_instruction();
7102   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7103   emit_int8(0x25);
7104   emit_int8((unsigned char)(0xC0 | encode));
7105   emit_int8(imm8);
7106 }
7107 
7108 // vinserti forms
7109 
7110 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7111   assert(VM_Version::supports_avx2(), "");
7112   assert(imm8 <= 0x01, "imm8: %u", imm8);
7113   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7114   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7115   // last byte:
7116   // 0x00 - insert into lower 128 bits
7117   // 0x01 - insert into upper 128 bits
7118   emit_int24(0x38, (0xC0 | encode), imm8 & 0x01);
7119 }
7120 
7121 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7122   assert(VM_Version::supports_avx2(), "");
7123   assert(dst != xnoreg, "sanity");
7124   assert(imm8 <= 0x01, "imm8: %u", imm8);
7125   InstructionMark im(this);
7126   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7127   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7128   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7129   emit_int8(0x38);
7130   emit_operand(dst, src);
7131   // 0x00 - insert into lower 128 bits
7132   // 0x01 - insert into upper 128 bits
7133   emit_int8(imm8 & 0x01);
7134 }
7135 
7136 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7137   assert(VM_Version::supports_evex(), "");
7138   assert(imm8 <= 0x03, "imm8: %u", imm8);
7139   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7140   attributes.set_is_evex_instruction();
7141   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7142   // imm8:
7143   // 0x00 - insert into q0 128 bits (0..127)
7144   // 0x01 - insert into q1 128 bits (128..255)
7145   // 0x02 - insert into q2 128 bits (256..383)
7146   // 0x03 - insert into q3 128 bits (384..511)
7147   emit_int24(0x38, (0xC0 | encode), imm8 & 0x03);
7148 }
7149 
7150 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7151   assert(VM_Version::supports_avx(), "");
7152   assert(dst != xnoreg, "sanity");
7153   assert(imm8 <= 0x03, "imm8: %u", imm8);
7154   InstructionMark im(this);
7155   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7156   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7157   attributes.set_is_evex_instruction();
7158   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7159   emit_int8(0x18);
7160   emit_operand(dst, src);
7161   // 0x00 - insert into q0 128 bits (0..127)
7162   // 0x01 - insert into q1 128 bits (128..255)
7163   // 0x02 - insert into q2 128 bits (256..383)
7164   // 0x03 - insert into q3 128 bits (384..511)
7165   emit_int8(imm8 & 0x03);
7166 }
7167 
7168 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7169   assert(VM_Version::supports_evex(), "");
7170   assert(imm8 <= 0x01, "imm8: %u", imm8);
7171   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7172   attributes.set_is_evex_instruction();
7173   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7174   //imm8:
7175   // 0x00 - insert into lower 256 bits
7176   // 0x01 - insert into upper 256 bits
7177   emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01);
7178 }
7179 
7180 
7181 // vinsertf forms
7182 
7183 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7184   assert(VM_Version::supports_avx(), "");
7185   assert(imm8 <= 0x01, "imm8: %u", imm8);
7186   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7187   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7188   // imm8:
7189   // 0x00 - insert into lower 128 bits
7190   // 0x01 - insert into upper 128 bits
7191   emit_int24(0x18, (0xC0 | encode), imm8 & 0x01);
7192 }
7193 
7194 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7195   assert(VM_Version::supports_avx(), "");
7196   assert(dst != xnoreg, "sanity");
7197   assert(imm8 <= 0x01, "imm8: %u", imm8);
7198   InstructionMark im(this);
7199   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7200   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7201   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7202   emit_int8(0x18);
7203   emit_operand(dst, src);
7204   // 0x00 - insert into lower 128 bits
7205   // 0x01 - insert into upper 128 bits
7206   emit_int8(imm8 & 0x01);
7207 }
7208 
7209 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7210   assert(VM_Version::supports_avx2(), "");
7211   assert(imm8 <= 0x03, "imm8: %u", imm8);
7212   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7213   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7214   // imm8:
7215   // 0x00 - insert into q0 128 bits (0..127)
7216   // 0x01 - insert into q1 128 bits (128..255)
7217   // 0x02 - insert into q0 128 bits (256..383)
7218   // 0x03 - insert into q1 128 bits (384..512)
7219   emit_int24(0x18, (0xC0 | encode), imm8 & 0x03);
7220 }
7221 
7222 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7223   assert(VM_Version::supports_avx(), "");
7224   assert(dst != xnoreg, "sanity");
7225   assert(imm8 <= 0x03, "imm8: %u", imm8);
7226   InstructionMark im(this);
7227   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7228   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7229   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7230   emit_int8(0x18);
7231   emit_operand(dst, src);
7232   // 0x00 - insert into q0 128 bits (0..127)
7233   // 0x01 - insert into q1 128 bits (128..255)
7234   // 0x02 - insert into q0 128 bits (256..383)
7235   // 0x03 - insert into q1 128 bits (384..512)
7236   emit_int8(imm8 & 0x03);
7237 }
7238 
7239 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7240   assert(VM_Version::supports_evex(), "");
7241   assert(imm8 <= 0x01, "imm8: %u", imm8);
7242   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7243   attributes.set_is_evex_instruction();
7244   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7245   // imm8:
7246   // 0x00 - insert into lower 256 bits
7247   // 0x01 - insert into upper 256 bits
7248   emit_int24(0x1A, (0xC0 | encode), imm8 & 0x01);
7249 }
7250 
7251 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7252   assert(VM_Version::supports_evex(), "");
7253   assert(dst != xnoreg, "sanity");
7254   assert(imm8 <= 0x01, "imm8: %u", imm8);
7255   InstructionMark im(this);
7256   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7257   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7258   attributes.set_is_evex_instruction();
7259   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7260   emit_int8(0x1A);
7261   emit_operand(dst, src);
7262   // 0x00 - insert into lower 256 bits
7263   // 0x01 - insert into upper 256 bits
7264   emit_int8(imm8 & 0x01);
7265 }
7266 
7267 
7268 // vextracti forms
7269 
7270 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7271   assert(VM_Version::supports_avx2(), "");
7272   assert(imm8 <= 0x01, "imm8: %u", imm8);
7273   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7274   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7275   // imm8:
7276   // 0x00 - extract from lower 128 bits
7277   // 0x01 - extract from upper 128 bits
7278   emit_int24(0x39, (0xC0 | encode), imm8 & 0x01);
7279 }
7280 
7281 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
7282   assert(VM_Version::supports_avx2(), "");
7283   assert(src != xnoreg, "sanity");
7284   assert(imm8 <= 0x01, "imm8: %u", imm8);
7285   InstructionMark im(this);
7286   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7287   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7288   attributes.reset_is_clear_context();
7289   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7290   emit_int8(0x39);
7291   emit_operand(src, dst);
7292   // 0x00 - extract from lower 128 bits
7293   // 0x01 - extract from upper 128 bits
7294   emit_int8(imm8 & 0x01);
7295 }
7296 
7297 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7298   assert(VM_Version::supports_evex(), "");
7299   assert(imm8 <= 0x03, "imm8: %u", imm8);
7300   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7301   attributes.set_is_evex_instruction();
7302   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7303   // imm8:
7304   // 0x00 - extract from bits 127:0
7305   // 0x01 - extract from bits 255:128
7306   // 0x02 - extract from bits 383:256
7307   // 0x03 - extract from bits 511:384
7308   emit_int24(0x39, (0xC0 | encode), imm8 & 0x03);
7309 }
7310 
7311 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
7312   assert(VM_Version::supports_evex(), "");
7313   assert(src != xnoreg, "sanity");
7314   assert(imm8 <= 0x03, "imm8: %u", imm8);
7315   InstructionMark im(this);
7316   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7317   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7318   attributes.reset_is_clear_context();
7319   attributes.set_is_evex_instruction();
7320   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7321   emit_int8(0x39);
7322   emit_operand(src, dst);
7323   // 0x00 - extract from bits 127:0
7324   // 0x01 - extract from bits 255:128
7325   // 0x02 - extract from bits 383:256
7326   // 0x03 - extract from bits 511:384
7327   emit_int8(imm8 & 0x03);
7328 }
7329 
7330 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7331   assert(VM_Version::supports_avx512dq(), "");
7332   assert(imm8 <= 0x03, "imm8: %u", imm8);
7333   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7334   attributes.set_is_evex_instruction();
7335   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7336   // imm8:
7337   // 0x00 - extract from bits 127:0
7338   // 0x01 - extract from bits 255:128
7339   // 0x02 - extract from bits 383:256
7340   // 0x03 - extract from bits 511:384
7341   emit_int24(0x39, (0xC0 | encode), imm8 & 0x03);
7342 }
7343 
7344 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7345   assert(VM_Version::supports_evex(), "");
7346   assert(imm8 <= 0x01, "imm8: %u", imm8);
7347   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7348   attributes.set_is_evex_instruction();
7349   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7350   // imm8:
7351   // 0x00 - extract from lower 256 bits
7352   // 0x01 - extract from upper 256 bits
7353   emit_int24(0x3B, (0xC0 | encode), imm8 & 0x01);
7354 }
7355 
7356 void Assembler::vextracti64x4(Address dst, XMMRegister src, uint8_t imm8) {
7357   assert(VM_Version::supports_evex(), "");
7358   assert(src != xnoreg, "sanity");
7359   assert(imm8 <= 0x01, "imm8: %u", imm8);
7360   InstructionMark im(this);
7361   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7362   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7363   attributes.reset_is_clear_context();
7364   attributes.set_is_evex_instruction();
7365   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7366   emit_int8(0x38);
7367   emit_operand(src, dst);
7368   // 0x00 - extract from lower 256 bits
7369   // 0x01 - extract from upper 256 bits
7370   emit_int8(imm8 & 0x01);
7371 }
7372 // vextractf forms
7373 
7374 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7375   assert(VM_Version::supports_avx(), "");
7376   assert(imm8 <= 0x01, "imm8: %u", imm8);
7377   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7378   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7379   // imm8:
7380   // 0x00 - extract from lower 128 bits
7381   // 0x01 - extract from upper 128 bits
7382   emit_int24(0x19, (0xC0 | encode), imm8 & 0x01);
7383 }
7384 
7385 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
7386   assert(VM_Version::supports_avx(), "");
7387   assert(src != xnoreg, "sanity");
7388   assert(imm8 <= 0x01, "imm8: %u", imm8);
7389   InstructionMark im(this);
7390   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7391   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7392   attributes.reset_is_clear_context();
7393   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7394   emit_int8(0x19);
7395   emit_operand(src, dst);
7396   // 0x00 - extract from lower 128 bits
7397   // 0x01 - extract from upper 128 bits
7398   emit_int8(imm8 & 0x01);
7399 }
7400 
7401 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7402   assert(VM_Version::supports_evex(), "");
7403   assert(imm8 <= 0x03, "imm8: %u", imm8);
7404   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7405   attributes.set_is_evex_instruction();
7406   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7407   // imm8:
7408   // 0x00 - extract from bits 127:0
7409   // 0x01 - extract from bits 255:128
7410   // 0x02 - extract from bits 383:256
7411   // 0x03 - extract from bits 511:384
7412   emit_int24(0x19, (0xC0 | encode), imm8 & 0x03);
7413 }
7414 
7415 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
7416   assert(VM_Version::supports_evex(), "");
7417   assert(src != xnoreg, "sanity");
7418   assert(imm8 <= 0x03, "imm8: %u", imm8);
7419   InstructionMark im(this);
7420   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7421   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7422   attributes.reset_is_clear_context();
7423   attributes.set_is_evex_instruction();
7424   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7425   emit_int8(0x19);
7426   emit_operand(src, dst);
7427   // 0x00 - extract from bits 127:0
7428   // 0x01 - extract from bits 255:128
7429   // 0x02 - extract from bits 383:256
7430   // 0x03 - extract from bits 511:384
7431   emit_int8(imm8 & 0x03);
7432 }
7433 
7434 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7435   assert(VM_Version::supports_avx512dq(), "");
7436   assert(imm8 <= 0x03, "imm8: %u", imm8);
7437   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7438   attributes.set_is_evex_instruction();
7439   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7440   // imm8:
7441   // 0x00 - extract from bits 127:0
7442   // 0x01 - extract from bits 255:128
7443   // 0x02 - extract from bits 383:256
7444   // 0x03 - extract from bits 511:384
7445   emit_int24(0x19, (0xC0 | encode), imm8 & 0x03);
7446 }
7447 
7448 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7449   assert(VM_Version::supports_evex(), "");
7450   assert(imm8 <= 0x01, "imm8: %u", imm8);
7451   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7452   attributes.set_is_evex_instruction();
7453   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7454   // imm8:
7455   // 0x00 - extract from lower 256 bits
7456   // 0x01 - extract from upper 256 bits
7457   emit_int24(0x1B, (0xC0 | encode), imm8 & 0x01);
7458 }
7459 
7460 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
7461   assert(VM_Version::supports_evex(), "");
7462   assert(src != xnoreg, "sanity");
7463   assert(imm8 <= 0x01, "imm8: %u", imm8);
7464   InstructionMark im(this);
7465   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7466   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
7467   attributes.reset_is_clear_context();
7468   attributes.set_is_evex_instruction();
7469   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7470   emit_int8(0x1B);
7471   emit_operand(src, dst);
7472   // 0x00 - extract from lower 256 bits
7473   // 0x01 - extract from upper 256 bits
7474   emit_int8(imm8 & 0x01);
7475 }
7476 
7477 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7478 void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
7479   assert(VM_Version::supports_avx2(), "");
7480   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7481   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7482   emit_int16(0x78, (0xC0 | encode));
7483 }
7484 
7485 void Assembler::vpbroadcastb(XMMRegister dst, Address src, int vector_len) {
7486   assert(VM_Version::supports_avx2(), "");
7487   assert(dst != xnoreg, "sanity");
7488   InstructionMark im(this);
7489   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7490   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
7491   // swap src<->dst for encoding
7492   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7493   emit_int8(0x78);
7494   emit_operand(dst, src);
7495 }
7496 
7497 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7498 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
7499   assert(VM_Version::supports_avx2(), "");
7500   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7501   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7502   emit_int16(0x79, (0xC0 | encode));
7503 }
7504 
7505 void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) {
7506   assert(VM_Version::supports_avx2(), "");
7507   assert(dst != xnoreg, "sanity");
7508   InstructionMark im(this);
7509   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7510   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
7511   // swap src<->dst for encoding
7512   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7513   emit_int8(0x79);
7514   emit_operand(dst, src);
7515 }
7516 
7517 // xmm/mem sourced byte/word/dword/qword replicate
7518 
7519 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7520 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
7521   assert(UseAVX >= 2, "");
7522   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7523   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7524   emit_int16(0x58, (0xC0 | encode));
7525 }
7526 
7527 void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) {
7528   assert(VM_Version::supports_avx2(), "");
7529   assert(dst != xnoreg, "sanity");
7530   InstructionMark im(this);
7531   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7532   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7533   // swap src<->dst for encoding
7534   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7535   emit_int8(0x58);
7536   emit_operand(dst, src);
7537 }
7538 
7539 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7540 void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
7541   assert(VM_Version::supports_avx2(), "");
7542   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7543   attributes.set_rex_vex_w_reverted();
7544   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7545   emit_int16(0x59, (0xC0 | encode));
7546 }
7547 
7548 void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
7549   assert(VM_Version::supports_avx2(), "");
7550   assert(dst != xnoreg, "sanity");
7551   InstructionMark im(this);
7552   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7553   attributes.set_rex_vex_w_reverted();
7554   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7555   // swap src<->dst for encoding
7556   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7557   emit_int8(0x59);
7558   emit_operand(dst, src);
7559 }
7560 
7561 void Assembler::evbroadcasti32x4(XMMRegister dst, Address src, int vector_len) {
7562   assert(vector_len != Assembler::AVX_128bit, "");
7563   assert(VM_Version::supports_avx512dq(), "");
7564   assert(dst != xnoreg, "sanity");
7565   InstructionMark im(this);
7566   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7567   attributes.set_rex_vex_w_reverted();
7568   attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
7569   // swap src<->dst for encoding
7570   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7571   emit_int8(0x5A);
7572   emit_operand(dst, src);
7573 }
7574 
7575 void Assembler::evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len) {
7576   assert(vector_len != Assembler::AVX_128bit, "");
7577   assert(VM_Version::supports_avx512dq(), "");
7578   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7579   attributes.set_rex_vex_w_reverted();
7580   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7581   emit_int16(0x5A, (0xC0 | encode));
7582 }
7583 
7584 void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
7585   assert(vector_len != Assembler::AVX_128bit, "");
7586   assert(VM_Version::supports_avx512dq(), "");
7587   assert(dst != xnoreg, "sanity");
7588   InstructionMark im(this);
7589   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7590   attributes.set_rex_vex_w_reverted();
7591   attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
7592   // swap src<->dst for encoding
7593   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7594   emit_int8(0x5A);
7595   emit_operand(dst, src);
7596 }
7597 
7598 // scalar single/double precision replicate
7599 
7600 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
7601 void Assembler::vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
7602   assert(VM_Version::supports_avx2(), "");
7603   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7604   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7605   emit_int16(0x18, (0xC0 | encode));
7606 }
7607 
7608 void Assembler::vbroadcastss(XMMRegister dst, Address src, int vector_len) {
7609   assert(VM_Version::supports_avx(), "");
7610   assert(dst != xnoreg, "sanity");
7611   InstructionMark im(this);
7612   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7613   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7614   // swap src<->dst for encoding
7615   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7616   emit_int8(0x18);
7617   emit_operand(dst, src);
7618 }
7619 
7620 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
7621 void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
7622   assert(VM_Version::supports_avx2(), "");
7623   assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
7624   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7625   attributes.set_rex_vex_w_reverted();
7626   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7627   emit_int16(0x19, (0xC0 | encode));
7628 }
7629 
7630 void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) {
7631   assert(VM_Version::supports_avx(), "");
7632   assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
7633   assert(dst != xnoreg, "sanity");
7634   InstructionMark im(this);
7635   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7636   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7637   attributes.set_rex_vex_w_reverted();
7638   // swap src<->dst for encoding
7639   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7640   emit_int8(0x19);
7641   emit_operand(dst, src);
7642 }
7643 
7644 
7645 // gpr source broadcast forms
7646 
7647 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7648 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
7649   assert(VM_Version::supports_avx512bw(), "");
7650   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7651   attributes.set_is_evex_instruction();
7652   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7653   emit_int16(0x7A, (0xC0 | encode));
7654 }
7655 
7656 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7657 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
7658   assert(VM_Version::supports_avx512bw(), "");
7659   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7660   attributes.set_is_evex_instruction();
7661   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7662   emit_int16(0x7B, (0xC0 | encode));
7663 }
7664 
7665 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7666 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
7667   assert(VM_Version::supports_evex(), "");
7668   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7669   attributes.set_is_evex_instruction();
7670   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7671   emit_int16(0x7C, (0xC0 | encode));
7672 }
7673 
7674 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7675 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
7676   assert(VM_Version::supports_evex(), "");
7677   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7678   attributes.set_is_evex_instruction();
7679   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7680   emit_int16(0x7C, (0xC0 | encode));
7681 }
7682 
7683 void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7684   assert(VM_Version::supports_avx2(), "");
7685   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7686   assert(dst != xnoreg, "sanity");
7687   assert(src.isxmmindex(),"expected to be xmm index");
7688   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7689   InstructionMark im(this);
7690   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7691   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7692   emit_int8((unsigned char)0x90);
7693   emit_operand(dst, src);
7694 }
7695 
7696 void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7697   assert(VM_Version::supports_avx2(), "");
7698   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7699   assert(dst != xnoreg, "sanity");
7700   assert(src.isxmmindex(),"expected to be xmm index");
7701   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7702   InstructionMark im(this);
7703   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7704   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7705   emit_int8((unsigned char)0x90);
7706   emit_operand(dst, src);
7707 }
7708 
7709 void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7710   assert(VM_Version::supports_avx2(), "");
7711   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7712   assert(dst != xnoreg, "sanity");
7713   assert(src.isxmmindex(),"expected to be xmm index");
7714   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7715   InstructionMark im(this);
7716   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7717   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7718   emit_int8((unsigned char)0x92);
7719   emit_operand(dst, src);
7720 }
7721 
7722 void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7723   assert(VM_Version::supports_avx2(), "");
7724   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7725   assert(dst != xnoreg, "sanity");
7726   assert(src.isxmmindex(),"expected to be xmm index");
7727   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7728   InstructionMark im(this);
7729   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
7730   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7731   emit_int8((unsigned char)0x92);
7732   emit_operand(dst, src);
7733 }
7734 void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7735   assert(VM_Version::supports_evex(), "");
7736   assert(dst != xnoreg, "sanity");
7737   assert(src.isxmmindex(),"expected to be xmm index");
7738   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7739   assert(mask != k0, "instruction will #UD if mask is in k0");
7740   InstructionMark im(this);
7741   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7742   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7743   attributes.reset_is_clear_context();
7744   attributes.set_embedded_opmask_register_specifier(mask);
7745   attributes.set_is_evex_instruction();
7746   // swap src<->dst for encoding
7747   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7748   emit_int8((unsigned char)0x90);
7749   emit_operand(dst, src);
7750 }
7751 
7752 void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7753   assert(VM_Version::supports_evex(), "");
7754   assert(dst != xnoreg, "sanity");
7755   assert(src.isxmmindex(),"expected to be xmm index");
7756   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7757   assert(mask != k0, "instruction will #UD if mask is in k0");
7758   InstructionMark im(this);
7759   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7760   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7761   attributes.reset_is_clear_context();
7762   attributes.set_embedded_opmask_register_specifier(mask);
7763   attributes.set_is_evex_instruction();
7764   // swap src<->dst for encoding
7765   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7766   emit_int8((unsigned char)0x90);
7767   emit_operand(dst, src);
7768 }
7769 
7770 void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7771   assert(VM_Version::supports_evex(), "");
7772   assert(dst != xnoreg, "sanity");
7773   assert(src.isxmmindex(),"expected to be xmm index");
7774   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7775   assert(mask != k0, "instruction will #UD if mask is in k0");
7776   InstructionMark im(this);
7777   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7778   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7779   attributes.reset_is_clear_context();
7780   attributes.set_embedded_opmask_register_specifier(mask);
7781   attributes.set_is_evex_instruction();
7782   // swap src<->dst for encoding
7783   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7784   emit_int8((unsigned char)0x92);
7785   emit_operand(dst, src);
7786 }
7787 
7788 void Assembler::evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7789   assert(VM_Version::supports_evex(), "");
7790   assert(dst != xnoreg, "sanity");
7791   assert(src.isxmmindex(),"expected to be xmm index");
7792   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7793   assert(mask != k0, "instruction will #UD if mask is in k0");
7794   InstructionMark im(this);
7795   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7796   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7797   attributes.reset_is_clear_context();
7798   attributes.set_embedded_opmask_register_specifier(mask);
7799   attributes.set_is_evex_instruction();
7800   // swap src<->dst for encoding
7801   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7802   emit_int8((unsigned char)0x92);
7803   emit_operand(dst, src);
7804 }
7805 
7806 void Assembler::evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7807   assert(VM_Version::supports_evex(), "");
7808   assert(mask != k0, "instruction will #UD if mask is in k0");
7809   InstructionMark im(this);
7810   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7811   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7812   attributes.reset_is_clear_context();
7813   attributes.set_embedded_opmask_register_specifier(mask);
7814   attributes.set_is_evex_instruction();
7815   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7816   emit_int8((unsigned char)0xA0);
7817   emit_operand(src, dst);
7818 }
7819 
7820 void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7821   assert(VM_Version::supports_evex(), "");
7822   assert(mask != k0, "instruction will #UD if mask is in k0");
7823   InstructionMark im(this);
7824   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7825   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7826   attributes.reset_is_clear_context();
7827   attributes.set_embedded_opmask_register_specifier(mask);
7828   attributes.set_is_evex_instruction();
7829   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7830   emit_int8((unsigned char)0xA0);
7831   emit_operand(src, dst);
7832 }
7833 
7834 void Assembler::evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7835   assert(VM_Version::supports_evex(), "");
7836   assert(mask != k0, "instruction will #UD if mask is in k0");
7837   InstructionMark im(this);
7838   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7839   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7840   attributes.reset_is_clear_context();
7841   attributes.set_embedded_opmask_register_specifier(mask);
7842   attributes.set_is_evex_instruction();
7843   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7844   emit_int8((unsigned char)0xA2);
7845   emit_operand(src, dst);
7846 }
7847 
7848 void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7849   assert(VM_Version::supports_evex(), "");
7850   assert(mask != k0, "instruction will #UD if mask is in k0");
7851   InstructionMark im(this);
7852   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7853   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7854   attributes.reset_is_clear_context();
7855   attributes.set_embedded_opmask_register_specifier(mask);
7856   attributes.set_is_evex_instruction();
7857   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7858   emit_int8((unsigned char)0xA2);
7859   emit_operand(src, dst);
7860 }
7861 // Carry-Less Multiplication Quadword
7862 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
7863   assert(VM_Version::supports_clmul(), "");
7864   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7865   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7866   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7867 }
7868 
7869 // Carry-Less Multiplication Quadword
7870 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
7871   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
7872   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7873   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7874   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7875 }
7876 
7877 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
7878   assert(VM_Version::supports_avx512_vpclmulqdq(), "Requires vector carryless multiplication support");
7879   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7880   attributes.set_is_evex_instruction();
7881   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7882   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7883 }
7884 
7885 void Assembler::vzeroupper_uncached() {
7886   if (VM_Version::supports_vzeroupper()) {
7887     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7888     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7889     emit_int8(0x77);
7890   }
7891 }
7892 
7893 #ifndef _LP64
7894 // 32bit only pieces of the assembler
7895 
7896 void Assembler::emms() {
7897   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
7898   emit_int16(0x0F, 0x77);
7899 }
7900 
7901 void Assembler::vzeroupper() {
7902   vzeroupper_uncached();
7903 }
7904 
7905 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7906   // NO PREFIX AS NEVER 64BIT
7907   InstructionMark im(this);
7908   emit_int16((unsigned char)0x81, (0xF8 | src1->encoding()));
7909   emit_data(imm32, rspec, 0);
7910 }
7911 
7912 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7913   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
7914   InstructionMark im(this);
7915   emit_int8((unsigned char)0x81);
7916   emit_operand(rdi, src1);
7917   emit_data(imm32, rspec, 0);
7918 }
7919 
7920 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
7921 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
7922 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
7923 void Assembler::cmpxchg8(Address adr) {
7924   InstructionMark im(this);
7925   emit_int16(0x0F, (unsigned char)0xC7);
7926   emit_operand(rcx, adr);
7927 }
7928 
7929 void Assembler::decl(Register dst) {
7930   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7931  emit_int8(0x48 | dst->encoding());
7932 }
7933 
7934 // 64bit doesn't use the x87
7935 
7936 void Assembler::emit_operand32(Register reg, Address adr) {
7937   assert(reg->encoding() < 8, "no extended registers");
7938   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
7939   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
7940                adr._rspec);
7941 }
7942 
7943 void Assembler::emit_farith(int b1, int b2, int i) {
7944   assert(isByte(b1) && isByte(b2), "wrong opcode");
7945   assert(0 <= i &&  i < 8, "illegal stack offset");
7946   emit_int16(b1, b2 + i);
7947 }
7948 
7949 void Assembler::fabs() {
7950   emit_int16((unsigned char)0xD9, (unsigned char)0xE1);
7951 }
7952 
7953 void Assembler::fadd(int i) {
7954   emit_farith(0xD8, 0xC0, i);
7955 }
7956 
7957 void Assembler::fadd_d(Address src) {
7958   InstructionMark im(this);
7959   emit_int8((unsigned char)0xDC);
7960   emit_operand32(rax, src);
7961 }
7962 
7963 void Assembler::fadd_s(Address src) {
7964   InstructionMark im(this);
7965   emit_int8((unsigned char)0xD8);
7966   emit_operand32(rax, src);
7967 }
7968 
7969 void Assembler::fadda(int i) {
7970   emit_farith(0xDC, 0xC0, i);
7971 }
7972 
7973 void Assembler::faddp(int i) {
7974   emit_farith(0xDE, 0xC0, i);
7975 }
7976 
7977 void Assembler::fchs() {
7978   emit_int16((unsigned char)0xD9, (unsigned char)0xE0);
7979 }
7980 
7981 void Assembler::fcom(int i) {
7982   emit_farith(0xD8, 0xD0, i);
7983 }
7984 
7985 void Assembler::fcomp(int i) {
7986   emit_farith(0xD8, 0xD8, i);
7987 }
7988 
7989 void Assembler::fcomp_d(Address src) {
7990   InstructionMark im(this);
7991   emit_int8((unsigned char)0xDC);
7992   emit_operand32(rbx, src);
7993 }
7994 
7995 void Assembler::fcomp_s(Address src) {
7996   InstructionMark im(this);
7997   emit_int8((unsigned char)0xD8);
7998   emit_operand32(rbx, src);
7999 }
8000 
8001 void Assembler::fcompp() {
8002   emit_int16((unsigned char)0xDE, (unsigned char)0xD9);
8003 }
8004 
8005 void Assembler::fcos() {
8006   emit_int16((unsigned char)0xD9, (unsigned char)0xFF);
8007 }
8008 
8009 void Assembler::fdecstp() {
8010   emit_int16((unsigned char)0xD9, (unsigned char)0xF6);
8011 }
8012 
8013 void Assembler::fdiv(int i) {
8014   emit_farith(0xD8, 0xF0, i);
8015 }
8016 
8017 void Assembler::fdiv_d(Address src) {
8018   InstructionMark im(this);
8019   emit_int8((unsigned char)0xDC);
8020   emit_operand32(rsi, src);
8021 }
8022 
8023 void Assembler::fdiv_s(Address src) {
8024   InstructionMark im(this);
8025   emit_int8((unsigned char)0xD8);
8026   emit_operand32(rsi, src);
8027 }
8028 
8029 void Assembler::fdiva(int i) {
8030   emit_farith(0xDC, 0xF8, i);
8031 }
8032 
8033 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
8034 //       is erroneous for some of the floating-point instructions below.
8035 
8036 void Assembler::fdivp(int i) {
8037   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
8038 }
8039 
8040 void Assembler::fdivr(int i) {
8041   emit_farith(0xD8, 0xF8, i);
8042 }
8043 
8044 void Assembler::fdivr_d(Address src) {
8045   InstructionMark im(this);
8046   emit_int8((unsigned char)0xDC);
8047   emit_operand32(rdi, src);
8048 }
8049 
8050 void Assembler::fdivr_s(Address src) {
8051   InstructionMark im(this);
8052   emit_int8((unsigned char)0xD8);
8053   emit_operand32(rdi, src);
8054 }
8055 
8056 void Assembler::fdivra(int i) {
8057   emit_farith(0xDC, 0xF0, i);
8058 }
8059 
8060 void Assembler::fdivrp(int i) {
8061   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
8062 }
8063 
8064 void Assembler::ffree(int i) {
8065   emit_farith(0xDD, 0xC0, i);
8066 }
8067 
8068 void Assembler::fild_d(Address adr) {
8069   InstructionMark im(this);
8070   emit_int8((unsigned char)0xDF);
8071   emit_operand32(rbp, adr);
8072 }
8073 
8074 void Assembler::fild_s(Address adr) {
8075   InstructionMark im(this);
8076   emit_int8((unsigned char)0xDB);
8077   emit_operand32(rax, adr);
8078 }
8079 
8080 void Assembler::fincstp() {
8081   emit_int16((unsigned char)0xD9, (unsigned char)0xF7);
8082 }
8083 
8084 void Assembler::finit() {
8085   emit_int24((unsigned char)0x9B, (unsigned char)0xDB, (unsigned char)0xE3);
8086 }
8087 
8088 void Assembler::fist_s(Address adr) {
8089   InstructionMark im(this);
8090   emit_int8((unsigned char)0xDB);
8091   emit_operand32(rdx, adr);
8092 }
8093 
8094 void Assembler::fistp_d(Address adr) {
8095   InstructionMark im(this);
8096   emit_int8((unsigned char)0xDF);
8097   emit_operand32(rdi, adr);
8098 }
8099 
8100 void Assembler::fistp_s(Address adr) {
8101   InstructionMark im(this);
8102   emit_int8((unsigned char)0xDB);
8103   emit_operand32(rbx, adr);
8104 }
8105 
8106 void Assembler::fld1() {
8107   emit_int16((unsigned char)0xD9, (unsigned char)0xE8);
8108 }
8109 
8110 void Assembler::fld_d(Address adr) {
8111   InstructionMark im(this);
8112   emit_int8((unsigned char)0xDD);
8113   emit_operand32(rax, adr);
8114 }
8115 
8116 void Assembler::fld_s(Address adr) {
8117   InstructionMark im(this);
8118   emit_int8((unsigned char)0xD9);
8119   emit_operand32(rax, adr);
8120 }
8121 
8122 
8123 void Assembler::fld_s(int index) {
8124   emit_farith(0xD9, 0xC0, index);
8125 }
8126 
8127 void Assembler::fld_x(Address adr) {
8128   InstructionMark im(this);
8129   emit_int8((unsigned char)0xDB);
8130   emit_operand32(rbp, adr);
8131 }
8132 
8133 void Assembler::fldcw(Address src) {
8134   InstructionMark im(this);
8135   emit_int8((unsigned char)0xD9);
8136   emit_operand32(rbp, src);
8137 }
8138 
8139 void Assembler::fldenv(Address src) {
8140   InstructionMark im(this);
8141   emit_int8((unsigned char)0xD9);
8142   emit_operand32(rsp, src);
8143 }
8144 
8145 void Assembler::fldlg2() {
8146   emit_int16((unsigned char)0xD9, (unsigned char)0xEC);
8147 }
8148 
8149 void Assembler::fldln2() {
8150   emit_int16((unsigned char)0xD9, (unsigned char)0xED);
8151 }
8152 
8153 void Assembler::fldz() {
8154   emit_int16((unsigned char)0xD9, (unsigned char)0xEE);
8155 }
8156 
8157 void Assembler::flog() {
8158   fldln2();
8159   fxch();
8160   fyl2x();
8161 }
8162 
8163 void Assembler::flog10() {
8164   fldlg2();
8165   fxch();
8166   fyl2x();
8167 }
8168 
8169 void Assembler::fmul(int i) {
8170   emit_farith(0xD8, 0xC8, i);
8171 }
8172 
8173 void Assembler::fmul_d(Address src) {
8174   InstructionMark im(this);
8175   emit_int8((unsigned char)0xDC);
8176   emit_operand32(rcx, src);
8177 }
8178 
8179 void Assembler::fmul_s(Address src) {
8180   InstructionMark im(this);
8181   emit_int8((unsigned char)0xD8);
8182   emit_operand32(rcx, src);
8183 }
8184 
8185 void Assembler::fmula(int i) {
8186   emit_farith(0xDC, 0xC8, i);
8187 }
8188 
8189 void Assembler::fmulp(int i) {
8190   emit_farith(0xDE, 0xC8, i);
8191 }
8192 
8193 void Assembler::fnsave(Address dst) {
8194   InstructionMark im(this);
8195   emit_int8((unsigned char)0xDD);
8196   emit_operand32(rsi, dst);
8197 }
8198 
8199 void Assembler::fnstcw(Address src) {
8200   InstructionMark im(this);
8201   emit_int16((unsigned char)0x9B, (unsigned char)0xD9);
8202   emit_operand32(rdi, src);
8203 }
8204 
8205 void Assembler::fnstsw_ax() {
8206   emit_int16((unsigned char)0xDF, (unsigned char)0xE0);
8207 }
8208 
8209 void Assembler::fprem() {
8210   emit_int16((unsigned char)0xD9, (unsigned char)0xF8);
8211 }
8212 
8213 void Assembler::fprem1() {
8214   emit_int16((unsigned char)0xD9, (unsigned char)0xF5);
8215 }
8216 
8217 void Assembler::frstor(Address src) {
8218   InstructionMark im(this);
8219   emit_int8((unsigned char)0xDD);
8220   emit_operand32(rsp, src);
8221 }
8222 
8223 void Assembler::fsin() {
8224   emit_int16((unsigned char)0xD9, (unsigned char)0xFE);
8225 }
8226 
8227 void Assembler::fsqrt() {
8228   emit_int16((unsigned char)0xD9, (unsigned char)0xFA);
8229 }
8230 
8231 void Assembler::fst_d(Address adr) {
8232   InstructionMark im(this);
8233   emit_int8((unsigned char)0xDD);
8234   emit_operand32(rdx, adr);
8235 }
8236 
8237 void Assembler::fst_s(Address adr) {
8238   InstructionMark im(this);
8239   emit_int8((unsigned char)0xD9);
8240   emit_operand32(rdx, adr);
8241 }
8242 
8243 void Assembler::fstp_d(Address adr) {
8244   InstructionMark im(this);
8245   emit_int8((unsigned char)0xDD);
8246   emit_operand32(rbx, adr);
8247 }
8248 
8249 void Assembler::fstp_d(int index) {
8250   emit_farith(0xDD, 0xD8, index);
8251 }
8252 
8253 void Assembler::fstp_s(Address adr) {
8254   InstructionMark im(this);
8255   emit_int8((unsigned char)0xD9);
8256   emit_operand32(rbx, adr);
8257 }
8258 
8259 void Assembler::fstp_x(Address adr) {
8260   InstructionMark im(this);
8261   emit_int8((unsigned char)0xDB);
8262   emit_operand32(rdi, adr);
8263 }
8264 
8265 void Assembler::fsub(int i) {
8266   emit_farith(0xD8, 0xE0, i);
8267 }
8268 
8269 void Assembler::fsub_d(Address src) {
8270   InstructionMark im(this);
8271   emit_int8((unsigned char)0xDC);
8272   emit_operand32(rsp, src);
8273 }
8274 
8275 void Assembler::fsub_s(Address src) {
8276   InstructionMark im(this);
8277   emit_int8((unsigned char)0xD8);
8278   emit_operand32(rsp, src);
8279 }
8280 
8281 void Assembler::fsuba(int i) {
8282   emit_farith(0xDC, 0xE8, i);
8283 }
8284 
8285 void Assembler::fsubp(int i) {
8286   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
8287 }
8288 
8289 void Assembler::fsubr(int i) {
8290   emit_farith(0xD8, 0xE8, i);
8291 }
8292 
8293 void Assembler::fsubr_d(Address src) {
8294   InstructionMark im(this);
8295   emit_int8((unsigned char)0xDC);
8296   emit_operand32(rbp, src);
8297 }
8298 
8299 void Assembler::fsubr_s(Address src) {
8300   InstructionMark im(this);
8301   emit_int8((unsigned char)0xD8);
8302   emit_operand32(rbp, src);
8303 }
8304 
8305 void Assembler::fsubra(int i) {
8306   emit_farith(0xDC, 0xE0, i);
8307 }
8308 
8309 void Assembler::fsubrp(int i) {
8310   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
8311 }
8312 
8313 void Assembler::ftan() {
8314   emit_int32((unsigned char)0xD9, (unsigned char)0xF2, (unsigned char)0xDD, (unsigned char)0xD8);
8315 }
8316 
8317 void Assembler::ftst() {
8318   emit_int16((unsigned char)0xD9, (unsigned char)0xE4);
8319 }
8320 
8321 void Assembler::fucomi(int i) {
8322   // make sure the instruction is supported (introduced for P6, together with cmov)
8323   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8324   emit_farith(0xDB, 0xE8, i);
8325 }
8326 
8327 void Assembler::fucomip(int i) {
8328   // make sure the instruction is supported (introduced for P6, together with cmov)
8329   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8330   emit_farith(0xDF, 0xE8, i);
8331 }
8332 
8333 void Assembler::fwait() {
8334   emit_int8((unsigned char)0x9B);
8335 }
8336 
8337 void Assembler::fxch(int i) {
8338   emit_farith(0xD9, 0xC8, i);
8339 }
8340 
8341 void Assembler::fyl2x() {
8342   emit_int16((unsigned char)0xD9, (unsigned char)0xF1);
8343 }
8344 
8345 void Assembler::frndint() {
8346   emit_int16((unsigned char)0xD9, (unsigned char)0xFC);
8347 }
8348 
8349 void Assembler::f2xm1() {
8350   emit_int16((unsigned char)0xD9, (unsigned char)0xF0);
8351 }
8352 
8353 void Assembler::fldl2e() {
8354   emit_int16((unsigned char)0xD9, (unsigned char)0xEA);
8355 }
8356 #endif // !_LP64
8357 
8358 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
8359 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
8360 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
8361 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
8362 
8363 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
8364 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8365   if (pre > 0) {
8366     emit_int8(simd_pre[pre]);
8367   }
8368   if (rex_w) {
8369     prefixq(adr, xreg);
8370   } else {
8371     prefix(adr, xreg);
8372   }
8373   if (opc > 0) {
8374     emit_int8(0x0F);
8375     int opc2 = simd_opc[opc];
8376     if (opc2 > 0) {
8377       emit_int8(opc2);
8378     }
8379   }
8380 }
8381 
8382 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8383   if (pre > 0) {
8384     emit_int8(simd_pre[pre]);
8385   }
8386   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
8387   if (opc > 0) {
8388     emit_int8(0x0F);
8389     int opc2 = simd_opc[opc];
8390     if (opc2 > 0) {
8391       emit_int8(opc2);
8392     }
8393   }
8394   return encode;
8395 }
8396 
8397 
8398 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
8399   int vector_len = _attributes->get_vector_len();
8400   bool vex_w = _attributes->is_rex_vex_w();
8401   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
8402     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
8403     byte1 = (~byte1) & 0xE0;
8404     byte1 |= opc;
8405 
8406     int byte2 = ((~nds_enc) & 0xf) << 3;
8407     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
8408 
8409     emit_int24((unsigned char)VEX_3bytes, byte1, byte2);
8410   } else {
8411     int byte1 = vex_r ? VEX_R : 0;
8412     byte1 = (~byte1) & 0x80;
8413     byte1 |= ((~nds_enc) & 0xf) << 3;
8414     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
8415     emit_int16((unsigned char)VEX_2bytes, byte1);
8416   }
8417 }
8418 
8419 // This is a 4 byte encoding
8420 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
8421   // EVEX 0x62 prefix
8422   // byte1 = EVEX_4bytes;
8423 
8424   bool vex_w = _attributes->is_rex_vex_w();
8425   int evex_encoding = (vex_w ? VEX_W : 0);
8426   // EVEX.b is not currently used for broadcast of single element or data rounding modes
8427   _attributes->set_evex_encoding(evex_encoding);
8428 
8429   // P0: byte 2, initialized to RXBR`00mm
8430   // instead of not'd
8431   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
8432   byte2 = (~byte2) & 0xF0;
8433   // confine opc opcode extensions in mm bits to lower two bits
8434   // of form {0F, 0F_38, 0F_3A}
8435   byte2 |= opc;
8436 
8437   // P1: byte 3 as Wvvvv1pp
8438   int byte3 = ((~nds_enc) & 0xf) << 3;
8439   // p[10] is always 1
8440   byte3 |= EVEX_F;
8441   byte3 |= (vex_w & 1) << 7;
8442   // confine pre opcode extensions in pp bits to lower two bits
8443   // of form {66, F3, F2}
8444   byte3 |= pre;
8445 
8446   // P2: byte 4 as zL'Lbv'aaa
8447   // kregs are implemented in the low 3 bits as aaa
8448   int byte4 = (_attributes->is_no_reg_mask()) ?
8449               0 :
8450               _attributes->get_embedded_opmask_register_specifier();
8451   // EVEX.v` for extending EVEX.vvvv or VIDX
8452   byte4 |= (evex_v ? 0: EVEX_V);
8453   // third EXEC.b for broadcast actions
8454   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
8455   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
8456   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
8457   // last is EVEX.z for zero/merge actions
8458   if (_attributes->is_no_reg_mask() == false &&
8459       _attributes->get_embedded_opmask_register_specifier() != 0) {
8460     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
8461   }
8462 
8463   emit_int32(EVEX_4bytes, byte2, byte3, byte4);
8464 }
8465 
8466 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8467   bool vex_r = (xreg_enc & 8) == 8;
8468   bool vex_b = adr.base_needs_rex();
8469   bool vex_x;
8470   if (adr.isxmmindex()) {
8471     vex_x = adr.xmmindex_needs_rex();
8472   } else {
8473     vex_x = adr.index_needs_rex();
8474   }
8475   set_attributes(attributes);
8476   attributes->set_current_assembler(this);
8477 
8478   // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
8479   // is allowed in legacy mode and has resources which will fit in it.
8480   // Pure EVEX instructions will have is_evex_instruction set in their definition.
8481   if (!attributes->is_legacy_mode()) {
8482     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
8483       if ((attributes->get_vector_len() != AVX_512bit) && (nds_enc < 16) && (xreg_enc < 16)) {
8484           attributes->set_is_legacy_mode();
8485       }
8486     }
8487   }
8488 
8489   if (UseAVX > 2) {
8490     assert(((!attributes->uses_vl()) ||
8491             (attributes->get_vector_len() == AVX_512bit) ||
8492             (!_legacy_mode_vl) ||
8493             (attributes->is_legacy_mode())),"XMM register should be 0-15");
8494     assert(((nds_enc < 16 && xreg_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
8495   }
8496 
8497   clear_managed();
8498   if (UseAVX > 2 && !attributes->is_legacy_mode())
8499   {
8500     bool evex_r = (xreg_enc >= 16);
8501     bool evex_v;
8502     // EVEX.V' is set to true when VSIB is used as we may need to use higher order XMM registers (16-31)
8503     if (adr.isxmmindex())  {
8504       evex_v = ((adr._xmmindex->encoding() > 15) ? true : false);
8505     } else {
8506       evex_v = (nds_enc >= 16);
8507     }
8508     attributes->set_is_evex_instruction();
8509     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8510   } else {
8511     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8512       attributes->set_rex_vex_w(false);
8513     }
8514     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8515   }
8516 }
8517 
8518 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8519   bool vex_r = (dst_enc & 8) == 8;
8520   bool vex_b = (src_enc & 8) == 8;
8521   bool vex_x = false;
8522   set_attributes(attributes);
8523   attributes->set_current_assembler(this);
8524 
8525   // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
8526   // is allowed in legacy mode and has resources which will fit in it.
8527   // Pure EVEX instructions will have is_evex_instruction set in their definition.
8528   if (!attributes->is_legacy_mode()) {
8529     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
8530       if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) &&
8531           (dst_enc < 16) && (nds_enc < 16) && (src_enc < 16)) {
8532           attributes->set_is_legacy_mode();
8533       }
8534     }
8535   }
8536 
8537   if (UseAVX > 2) {
8538     // All the scalar fp instructions (with uses_vl as false) can have legacy_mode as false
8539     // Instruction with uses_vl true are vector instructions
8540     // All the vector instructions with AVX_512bit length can have legacy_mode as false
8541     // All the vector instructions with < AVX_512bit length can have legacy_mode as false if AVX512vl() is supported
8542     // Rest all should have legacy_mode set as true
8543     assert(((!attributes->uses_vl()) ||
8544             (attributes->get_vector_len() == AVX_512bit) ||
8545             (!_legacy_mode_vl) ||
8546             (attributes->is_legacy_mode())),"XMM register should be 0-15");
8547     // Instruction with legacy_mode true should have dst, nds and src < 15
8548     assert(((dst_enc < 16 && nds_enc < 16 && src_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
8549   }
8550 
8551   clear_managed();
8552   if (UseAVX > 2 && !attributes->is_legacy_mode())
8553   {
8554     bool evex_r = (dst_enc >= 16);
8555     bool evex_v = (nds_enc >= 16);
8556     // can use vex_x as bank extender on rm encoding
8557     vex_x = (src_enc >= 16);
8558     attributes->set_is_evex_instruction();
8559     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8560   } else {
8561     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8562       attributes->set_rex_vex_w(false);
8563     }
8564     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8565   }
8566 
8567   // return modrm byte components for operands
8568   return (((dst_enc & 7) << 3) | (src_enc & 7));
8569 }
8570 
8571 
8572 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
8573                             VexOpcode opc, InstructionAttr *attributes) {
8574   if (UseAVX > 0) {
8575     int xreg_enc = xreg->encoding();
8576     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8577     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
8578   } else {
8579     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
8580     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
8581   }
8582 }
8583 
8584 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
8585                                       VexOpcode opc, InstructionAttr *attributes) {
8586   int dst_enc = dst->encoding();
8587   int src_enc = src->encoding();
8588   if (UseAVX > 0) {
8589     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8590     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
8591   } else {
8592     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
8593     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
8594   }
8595 }
8596 
8597 void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8598   assert(VM_Version::supports_avx(), "");
8599   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8600   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8601   emit_int16(0x5F, (0xC0 | encode));
8602 }
8603 
8604 void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8605   assert(VM_Version::supports_avx(), "");
8606   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8607   attributes.set_rex_vex_w_reverted();
8608   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8609   emit_int16(0x5F, (0xC0 | encode));
8610 }
8611 
8612 void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8613   assert(VM_Version::supports_avx(), "");
8614   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8615   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8616   emit_int16(0x5D, (0xC0 | encode));
8617 }
8618 
8619 void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8620   assert(VM_Version::supports_avx(), "");
8621   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8622   attributes.set_rex_vex_w_reverted();
8623   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8624   emit_int16(0x5D, (0xC0 | encode));
8625 }
8626 
8627 void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
8628   assert(VM_Version::supports_avx(), "");
8629   assert(vector_len <= AVX_256bit, "");
8630   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8631   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8632   emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop));
8633 }
8634 
8635 void Assembler::blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8636   assert(VM_Version::supports_avx(), "");
8637   assert(vector_len <= AVX_256bit, "");
8638   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8639   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8640   int src2_enc = src2->encoding();
8641   emit_int24(0x4C, (0xC0 | encode), (0xF0 & src2_enc << 4));
8642 }
8643 
8644 void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8645   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8646   assert(vector_len <= AVX_256bit, "");
8647   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8648   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8649   int src2_enc = src2->encoding();
8650   emit_int24(0x4B, (0xC0 | encode), (0xF0 & src2_enc << 4));
8651 }
8652 
8653 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8654   assert(VM_Version::supports_avx2(), "");
8655   assert(vector_len <= AVX_256bit, "");
8656   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8657   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8658   emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
8659 }
8660 
8661 void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
8662   assert(VM_Version::supports_avx(), "");
8663   assert(vector_len <= AVX_256bit, "");
8664   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8665   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8666   emit_int24((unsigned char)0xC2, (0xC0 | encode), (unsigned char)comparison);
8667 }
8668 
8669 void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8670                         ComparisonPredicateFP comparison, int vector_len) {
8671   assert(VM_Version::supports_evex(), "");
8672   // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
8673   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8674   attributes.set_is_evex_instruction();
8675   attributes.set_embedded_opmask_register_specifier(mask);
8676   attributes.reset_is_clear_context();
8677   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8678   emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
8679 }
8680 
8681 void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8682                         ComparisonPredicateFP comparison, int vector_len) {
8683   assert(VM_Version::supports_evex(), "");
8684   // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
8685   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8686   attributes.set_is_evex_instruction();
8687   attributes.set_embedded_opmask_register_specifier(mask);
8688   attributes.reset_is_clear_context();
8689   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8690   emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
8691 }
8692 
8693 void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
8694   assert(VM_Version::supports_sse4_1(), "");
8695   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8696   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8697   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8698   emit_int16(0x14, (0xC0 | encode));
8699 }
8700 
8701 void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
8702   assert(VM_Version::supports_sse4_1(), "");
8703   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8704   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8705   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8706   emit_int16(0x15, (0xC0 | encode));
8707 }
8708 
8709 void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
8710   assert(VM_Version::supports_sse4_1(), "");
8711   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8712   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8713   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8714   emit_int16(0x10, (0xC0 | encode));
8715 }
8716 
8717 void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8718   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8719   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8720   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8721   int src2_enc = src2->encoding();
8722   emit_int24(0x4A, (0xC0 | encode), (0xF0 & src2_enc << 4));
8723 }
8724 
8725 void Assembler::vblendps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8726   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8727   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8728   emit_int24(0x0C, (0xC0 | encode), imm8);
8729 }
8730 
8731 void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8732   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8733   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8734   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8735   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8736   emit_int16(0x64, (0xC0 | encode));
8737 }
8738 
8739 void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8740   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8741   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8742   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8743   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8744   emit_int16(0x65, (0xC0 | encode));
8745 }
8746 
8747 void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8748   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8749   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8750   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8751   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8752   emit_int16(0x66, (0xC0 | encode));
8753 }
8754 
8755 void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8756   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8757   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8758   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8759   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8760   emit_int16(0x37, (0xC0 | encode));
8761 }
8762 
8763 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8764                         int comparison, int vector_len) {
8765   assert(VM_Version::supports_evex(), "");
8766   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8767   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8768   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8769   attributes.set_is_evex_instruction();
8770   attributes.set_embedded_opmask_register_specifier(mask);
8771   attributes.reset_is_clear_context();
8772   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8773   emit_int24(0x1F, (0xC0 | encode), comparison);
8774 }
8775 
8776 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8777                         int comparison, int vector_len) {
8778   assert(VM_Version::supports_evex(), "");
8779   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8780   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8781   InstructionMark im(this);
8782   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8783   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8784   attributes.set_is_evex_instruction();
8785   attributes.set_embedded_opmask_register_specifier(mask);
8786   attributes.reset_is_clear_context();
8787   int dst_enc = kdst->encoding();
8788   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8789   emit_int8((unsigned char)0x1F);
8790   emit_operand(as_Register(dst_enc), src);
8791   emit_int8((unsigned char)comparison);
8792 }
8793 
8794 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8795                         int comparison, int vector_len) {
8796   assert(VM_Version::supports_evex(), "");
8797   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8798   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8799   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8800   attributes.set_is_evex_instruction();
8801   attributes.set_embedded_opmask_register_specifier(mask);
8802   attributes.reset_is_clear_context();
8803   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8804   emit_int24(0x1F, (0xC0 | encode), comparison);
8805 }
8806 
8807 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8808                         int comparison, int vector_len) {
8809   assert(VM_Version::supports_evex(), "");
8810   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8811   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8812   InstructionMark im(this);
8813   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8814   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8815   attributes.set_is_evex_instruction();
8816   attributes.set_embedded_opmask_register_specifier(mask);
8817   attributes.reset_is_clear_context();
8818   int dst_enc = kdst->encoding();
8819   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8820   emit_int8((unsigned char)0x1F);
8821   emit_operand(as_Register(dst_enc), src);
8822   emit_int8((unsigned char)comparison);
8823 }
8824 
8825 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8826                         int comparison, int vector_len) {
8827   assert(VM_Version::supports_evex(), "");
8828   assert(VM_Version::supports_avx512bw(), "");
8829   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8830   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8831   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8832   attributes.set_is_evex_instruction();
8833   attributes.set_embedded_opmask_register_specifier(mask);
8834   attributes.reset_is_clear_context();
8835   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8836   emit_int24(0x3F, (0xC0 | encode), comparison);
8837 }
8838 
8839 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8840                         int comparison, int vector_len) {
8841   assert(VM_Version::supports_evex(), "");
8842   assert(VM_Version::supports_avx512bw(), "");
8843   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8844   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8845   InstructionMark im(this);
8846   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8847   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8848   attributes.set_is_evex_instruction();
8849   attributes.set_embedded_opmask_register_specifier(mask);
8850   attributes.reset_is_clear_context();
8851   int dst_enc = kdst->encoding();
8852   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8853   emit_int8((unsigned char)0x3F);
8854   emit_operand(as_Register(dst_enc), src);
8855   emit_int8((unsigned char)comparison);
8856 }
8857 
8858 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8859                         int comparison, int vector_len) {
8860   assert(VM_Version::supports_evex(), "");
8861   assert(VM_Version::supports_avx512bw(), "");
8862   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8863   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8864   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8865   attributes.set_is_evex_instruction();
8866   attributes.set_embedded_opmask_register_specifier(mask);
8867   attributes.reset_is_clear_context();
8868   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8869   emit_int24(0x3F, (0xC0 | encode), comparison);
8870 }
8871 
8872 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8873                         int comparison, int vector_len) {
8874   assert(VM_Version::supports_evex(), "");
8875   assert(VM_Version::supports_avx512bw(), "");
8876   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8877   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8878   InstructionMark im(this);
8879   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8880   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8881   attributes.set_is_evex_instruction();
8882   attributes.set_embedded_opmask_register_specifier(mask);
8883   attributes.reset_is_clear_context();
8884   int dst_enc = kdst->encoding();
8885   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8886   emit_int8((unsigned char)0x3F);
8887   emit_operand(as_Register(dst_enc), src);
8888   emit_int8((unsigned char)comparison);
8889 }
8890 
8891 void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8892   assert(VM_Version::supports_avx(), "");
8893   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8894   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8895   int mask_enc = mask->encoding();
8896   emit_int24(0x4C, (0xC0 | encode), 0xF0 & mask_enc << 4);
8897 }
8898 
8899 void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8900   assert(VM_Version::supports_evex(), "");
8901   // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
8902   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8903   attributes.set_is_evex_instruction();
8904   attributes.set_embedded_opmask_register_specifier(mask);
8905   if (merge) {
8906     attributes.reset_is_clear_context();
8907   }
8908   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8909   emit_int16(0x65, (0xC0 | encode));
8910 }
8911 
8912 void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8913   assert(VM_Version::supports_evex(), "");
8914   // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
8915   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8916   attributes.set_is_evex_instruction();
8917   attributes.set_embedded_opmask_register_specifier(mask);
8918   if (merge) {
8919     attributes.reset_is_clear_context();
8920   }
8921   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8922   emit_int16(0x65, (0xC0 | encode));
8923 }
8924 
8925 void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8926   assert(VM_Version::supports_evex(), "");
8927   assert(VM_Version::supports_avx512bw(), "");
8928   // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
8929   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8930   attributes.set_is_evex_instruction();
8931   attributes.set_embedded_opmask_register_specifier(mask);
8932   if (merge) {
8933     attributes.reset_is_clear_context();
8934   }
8935   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8936   emit_int16(0x66, (0xC0 | encode));
8937 }
8938 
8939 void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8940   assert(VM_Version::supports_evex(), "");
8941   assert(VM_Version::supports_avx512bw(), "");
8942   // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
8943   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8944   attributes.set_is_evex_instruction();
8945   attributes.set_embedded_opmask_register_specifier(mask);
8946   if (merge) {
8947     attributes.reset_is_clear_context();
8948   }
8949   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8950   emit_int16(0x66, (0xC0 | encode));
8951 }
8952 
8953 void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8954   assert(VM_Version::supports_evex(), "");
8955   //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
8956   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8957   attributes.set_is_evex_instruction();
8958   attributes.set_embedded_opmask_register_specifier(mask);
8959   if (merge) {
8960     attributes.reset_is_clear_context();
8961   }
8962   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8963   emit_int16(0x64, (0xC0 | encode));
8964 }
8965 
8966 void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8967   assert(VM_Version::supports_evex(), "");
8968   //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
8969   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8970   attributes.set_is_evex_instruction();
8971   attributes.set_embedded_opmask_register_specifier(mask);
8972   if (merge) {
8973     attributes.reset_is_clear_context();
8974   }
8975   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8976   emit_int16(0x64, (0xC0 | encode));
8977 }
8978 
8979 void Assembler::shlxl(Register dst, Register src1, Register src2) {
8980   assert(VM_Version::supports_bmi2(), "");
8981   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8982   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8983   emit_int16((unsigned char)0xF7, (0xC0 | encode));
8984 }
8985 
8986 void Assembler::shlxq(Register dst, Register src1, Register src2) {
8987   assert(VM_Version::supports_bmi2(), "");
8988   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8989   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8990   emit_int16((unsigned char)0xF7, (0xC0 | encode));
8991 }
8992 
8993 #ifndef _LP64
8994 
8995 void Assembler::incl(Register dst) {
8996   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8997   emit_int8(0x40 | dst->encoding());
8998 }
8999 
9000 void Assembler::lea(Register dst, Address src) {
9001   leal(dst, src);
9002 }
9003 
9004 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
9005   InstructionMark im(this);
9006   emit_int8((unsigned char)0xC7);
9007   emit_operand(rax, dst);
9008   emit_data((int)imm32, rspec, 0);
9009 }
9010 
9011 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
9012   InstructionMark im(this);
9013   int encode = prefix_and_encode(dst->encoding());
9014   emit_int8((0xB8 | encode));
9015   emit_data((int)imm32, rspec, 0);
9016 }
9017 
9018 void Assembler::popa() { // 32bit
9019   emit_int8(0x61);
9020 }
9021 
9022 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
9023   InstructionMark im(this);
9024   emit_int8(0x68);
9025   emit_data(imm32, rspec, 0);
9026 }
9027 
9028 void Assembler::pusha() { // 32bit
9029   emit_int8(0x60);
9030 }
9031 
9032 void Assembler::set_byte_if_not_zero(Register dst) {
9033   emit_int24(0x0F, (unsigned char)0x95, (0xE0 | dst->encoding()));
9034 }
9035 
9036 #else // LP64
9037 
9038 void Assembler::set_byte_if_not_zero(Register dst) {
9039   int enc = prefix_and_encode(dst->encoding(), true);
9040   emit_int24(0x0F, (unsigned char)0x95, (0xE0 | enc));
9041 }
9042 
9043 // 64bit only pieces of the assembler
9044 // This should only be used by 64bit instructions that can use rip-relative
9045 // it cannot be used by instructions that want an immediate value.
9046 
9047 bool Assembler::reachable(AddressLiteral adr) {
9048   int64_t disp;
9049   relocInfo::relocType relocType = adr.reloc();
9050 
9051   // None will force a 64bit literal to the code stream. Likely a placeholder
9052   // for something that will be patched later and we need to certain it will
9053   // always be reachable.
9054   if (relocType == relocInfo::none) {
9055     return false;
9056   }
9057   if (relocType == relocInfo::internal_word_type) {
9058     // This should be rip relative and easily reachable.
9059     return true;
9060   }
9061   if (relocType == relocInfo::virtual_call_type ||
9062       relocType == relocInfo::opt_virtual_call_type ||
9063       relocType == relocInfo::static_call_type ||
9064       relocType == relocInfo::static_stub_type ) {
9065     // This should be rip relative within the code cache and easily
9066     // reachable until we get huge code caches. (At which point
9067     // ic code is going to have issues).
9068     return true;
9069   }
9070   if (relocType != relocInfo::external_word_type &&
9071       relocType != relocInfo::poll_return_type &&  // these are really external_word but need special
9072       relocType != relocInfo::poll_type &&         // relocs to identify them
9073       relocType != relocInfo::runtime_call_type ) {
9074     return false;
9075   }
9076 
9077   // Stress the correction code
9078   if (ForceUnreachable) {
9079     // Must be runtimecall reloc, see if it is in the codecache
9080     // Flipping stuff in the codecache to be unreachable causes issues
9081     // with things like inline caches where the additional instructions
9082     // are not handled.
9083     if (CodeCache::find_blob(adr._target) == NULL) {
9084       return false;
9085     }
9086   }
9087   // For external_word_type/runtime_call_type if it is reachable from where we
9088   // are now (possibly a temp buffer) and where we might end up
9089   // anywhere in the codeCache then we are always reachable.
9090   // This would have to change if we ever save/restore shared code
9091   // to be more pessimistic.
9092   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
9093   if (!is_simm32(disp)) return false;
9094   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
9095   if (!is_simm32(disp)) return false;
9096 
9097   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
9098 
9099   // Because rip relative is a disp + address_of_next_instruction and we
9100   // don't know the value of address_of_next_instruction we apply a fudge factor
9101   // to make sure we will be ok no matter the size of the instruction we get placed into.
9102   // We don't have to fudge the checks above here because they are already worst case.
9103 
9104   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
9105   // + 4 because better safe than sorry.
9106   const int fudge = 12 + 4;
9107   if (disp < 0) {
9108     disp -= fudge;
9109   } else {
9110     disp += fudge;
9111   }
9112   return is_simm32(disp);
9113 }
9114 
9115 void Assembler::emit_data64(jlong data,
9116                             relocInfo::relocType rtype,
9117                             int format) {
9118   if (rtype == relocInfo::none) {
9119     emit_int64(data);
9120   } else {
9121     emit_data64(data, Relocation::spec_simple(rtype), format);
9122   }
9123 }
9124 
9125 void Assembler::emit_data64(jlong data,
9126                             RelocationHolder const& rspec,
9127                             int format) {
9128   assert(imm_operand == 0, "default format must be immediate in this file");
9129   assert(imm_operand == format, "must be immediate");
9130   assert(inst_mark() != NULL, "must be inside InstructionMark");
9131   // Do not use AbstractAssembler::relocate, which is not intended for
9132   // embedded words.  Instead, relocate to the enclosing instruction.
9133   code_section()->relocate(inst_mark(), rspec, format);
9134 #ifdef ASSERT
9135   check_relocation(rspec, format);
9136 #endif
9137   emit_int64(data);
9138 }
9139 
9140 void Assembler::prefix(Register reg) {
9141   if (reg->encoding() >= 8) {
9142     prefix(REX_B);
9143   }
9144 }
9145 
9146 void Assembler::prefix(Register dst, Register src, Prefix p) {
9147   if (src->encoding() >= 8) {
9148     p = (Prefix)(p | REX_B);
9149   }
9150   if (dst->encoding() >= 8) {
9151     p = (Prefix)(p | REX_R);
9152   }
9153   if (p != Prefix_EMPTY) {
9154     // do not generate an empty prefix
9155     prefix(p);
9156   }
9157 }
9158 
9159 void Assembler::prefix(Register dst, Address adr, Prefix p) {
9160   if (adr.base_needs_rex()) {
9161     if (adr.index_needs_rex()) {
9162       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9163     } else {
9164       prefix(REX_B);
9165     }
9166   } else {
9167     if (adr.index_needs_rex()) {
9168       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9169     }
9170   }
9171   if (dst->encoding() >= 8) {
9172     p = (Prefix)(p | REX_R);
9173   }
9174   if (p != Prefix_EMPTY) {
9175     // do not generate an empty prefix
9176     prefix(p);
9177   }
9178 }
9179 
9180 void Assembler::prefix(Address adr) {
9181   if (adr.base_needs_rex()) {
9182     if (adr.index_needs_rex()) {
9183       prefix(REX_XB);
9184     } else {
9185       prefix(REX_B);
9186     }
9187   } else {
9188     if (adr.index_needs_rex()) {
9189       prefix(REX_X);
9190     }
9191   }
9192 }
9193 
9194 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
9195   if (reg->encoding() < 8) {
9196     if (adr.base_needs_rex()) {
9197       if (adr.index_needs_rex()) {
9198         prefix(REX_XB);
9199       } else {
9200         prefix(REX_B);
9201       }
9202     } else {
9203       if (adr.index_needs_rex()) {
9204         prefix(REX_X);
9205       } else if (byteinst && reg->encoding() >= 4) {
9206         prefix(REX);
9207       }
9208     }
9209   } else {
9210     if (adr.base_needs_rex()) {
9211       if (adr.index_needs_rex()) {
9212         prefix(REX_RXB);
9213       } else {
9214         prefix(REX_RB);
9215       }
9216     } else {
9217       if (adr.index_needs_rex()) {
9218         prefix(REX_RX);
9219       } else {
9220         prefix(REX_R);
9221       }
9222     }
9223   }
9224 }
9225 
9226 void Assembler::prefix(Address adr, XMMRegister reg) {
9227   if (reg->encoding() < 8) {
9228     if (adr.base_needs_rex()) {
9229       if (adr.index_needs_rex()) {
9230         prefix(REX_XB);
9231       } else {
9232         prefix(REX_B);
9233       }
9234     } else {
9235       if (adr.index_needs_rex()) {
9236         prefix(REX_X);
9237       }
9238     }
9239   } else {
9240     if (adr.base_needs_rex()) {
9241       if (adr.index_needs_rex()) {
9242         prefix(REX_RXB);
9243       } else {
9244         prefix(REX_RB);
9245       }
9246     } else {
9247       if (adr.index_needs_rex()) {
9248         prefix(REX_RX);
9249       } else {
9250         prefix(REX_R);
9251       }
9252     }
9253   }
9254 }
9255 
9256 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
9257   if (reg_enc >= 8) {
9258     prefix(REX_B);
9259     reg_enc -= 8;
9260   } else if (byteinst && reg_enc >= 4) {
9261     prefix(REX);
9262   }
9263   return reg_enc;
9264 }
9265 
9266 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
9267   if (dst_enc < 8) {
9268     if (src_enc >= 8) {
9269       prefix(REX_B);
9270       src_enc -= 8;
9271     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
9272       prefix(REX);
9273     }
9274   } else {
9275     if (src_enc < 8) {
9276       prefix(REX_R);
9277     } else {
9278       prefix(REX_RB);
9279       src_enc -= 8;
9280     }
9281     dst_enc -= 8;
9282   }
9283   return dst_enc << 3 | src_enc;
9284 }
9285 
9286 int8_t Assembler::get_prefixq(Address adr) {
9287   int8_t prfx = get_prefixq(adr, rax);
9288   assert(REX_W <= prfx && prfx <= REX_WXB, "must be");
9289   return prfx;
9290 }
9291 
9292 int8_t Assembler::get_prefixq(Address adr, Register src) {
9293   int8_t prfx = (int8_t)(REX_W +
9294                          ((int)adr.base_needs_rex()) +
9295                          ((int)adr.index_needs_rex() << 1) +
9296                          ((int)(src->encoding() >= 8) << 2));
9297 #ifdef ASSERT
9298   if (src->encoding() < 8) {
9299     if (adr.base_needs_rex()) {
9300       if (adr.index_needs_rex()) {
9301         assert(prfx == REX_WXB, "must be");
9302       } else {
9303         assert(prfx == REX_WB, "must be");
9304       }
9305     } else {
9306       if (adr.index_needs_rex()) {
9307         assert(prfx == REX_WX, "must be");
9308       } else {
9309         assert(prfx == REX_W, "must be");
9310       }
9311     }
9312   } else {
9313     if (adr.base_needs_rex()) {
9314       if (adr.index_needs_rex()) {
9315         assert(prfx == REX_WRXB, "must be");
9316       } else {
9317         assert(prfx == REX_WRB, "must be");
9318       }
9319     } else {
9320       if (adr.index_needs_rex()) {
9321         assert(prfx == REX_WRX, "must be");
9322       } else {
9323         assert(prfx == REX_WR, "must be");
9324       }
9325     }
9326   }
9327 #endif
9328   return prfx;
9329 }
9330 
9331 void Assembler::prefixq(Address adr) {
9332   emit_int8(get_prefixq(adr));
9333 }
9334 
9335 void Assembler::prefixq(Address adr, Register src) {
9336   emit_int8(get_prefixq(adr, src));
9337 }
9338 
9339 void Assembler::prefixq(Address adr, XMMRegister src) {
9340   if (src->encoding() < 8) {
9341     if (adr.base_needs_rex()) {
9342       if (adr.index_needs_rex()) {
9343         prefix(REX_WXB);
9344       } else {
9345         prefix(REX_WB);
9346       }
9347     } else {
9348       if (adr.index_needs_rex()) {
9349         prefix(REX_WX);
9350       } else {
9351         prefix(REX_W);
9352       }
9353     }
9354   } else {
9355     if (adr.base_needs_rex()) {
9356       if (adr.index_needs_rex()) {
9357         prefix(REX_WRXB);
9358       } else {
9359         prefix(REX_WRB);
9360       }
9361     } else {
9362       if (adr.index_needs_rex()) {
9363         prefix(REX_WRX);
9364       } else {
9365         prefix(REX_WR);
9366       }
9367     }
9368   }
9369 }
9370 
9371 int Assembler::prefixq_and_encode(int reg_enc) {
9372   if (reg_enc < 8) {
9373     prefix(REX_W);
9374   } else {
9375     prefix(REX_WB);
9376     reg_enc -= 8;
9377   }
9378   return reg_enc;
9379 }
9380 
9381 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
9382   if (dst_enc < 8) {
9383     if (src_enc < 8) {
9384       prefix(REX_W);
9385     } else {
9386       prefix(REX_WB);
9387       src_enc -= 8;
9388     }
9389   } else {
9390     if (src_enc < 8) {
9391       prefix(REX_WR);
9392     } else {
9393       prefix(REX_WRB);
9394       src_enc -= 8;
9395     }
9396     dst_enc -= 8;
9397   }
9398   return dst_enc << 3 | src_enc;
9399 }
9400 
9401 void Assembler::adcq(Register dst, int32_t imm32) {
9402   (void) prefixq_and_encode(dst->encoding());
9403   emit_arith(0x81, 0xD0, dst, imm32);
9404 }
9405 
9406 void Assembler::adcq(Register dst, Address src) {
9407   InstructionMark im(this);
9408   emit_int16(get_prefixq(src, dst), 0x13);
9409   emit_operand(dst, src);
9410 }
9411 
9412 void Assembler::adcq(Register dst, Register src) {
9413   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9414   emit_arith(0x13, 0xC0, dst, src);
9415 }
9416 
9417 void Assembler::addq(Address dst, int32_t imm32) {
9418   InstructionMark im(this);
9419   prefixq(dst);
9420   emit_arith_operand(0x81, rax, dst, imm32);
9421 }
9422 
9423 void Assembler::addq(Address dst, Register src) {
9424   InstructionMark im(this);
9425   emit_int16(get_prefixq(dst, src), 0x01);
9426   emit_operand(src, dst);
9427 }
9428 
9429 void Assembler::addq(Register dst, int32_t imm32) {
9430   (void) prefixq_and_encode(dst->encoding());
9431   emit_arith(0x81, 0xC0, dst, imm32);
9432 }
9433 
9434 void Assembler::addq(Register dst, Address src) {
9435   InstructionMark im(this);
9436   emit_int16(get_prefixq(src, dst), 0x03);
9437   emit_operand(dst, src);
9438 }
9439 
9440 void Assembler::addq(Register dst, Register src) {
9441   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9442   emit_arith(0x03, 0xC0, dst, src);
9443 }
9444 
9445 void Assembler::adcxq(Register dst, Register src) {
9446   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9447   emit_int8(0x66);
9448   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9449   emit_int32(0x0F,
9450              0x38,
9451              (unsigned char)0xF6,
9452              (0xC0 | encode));
9453 }
9454 
9455 void Assembler::adoxq(Register dst, Register src) {
9456   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9457   emit_int8((unsigned char)0xF3);
9458   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9459   emit_int32(0x0F,
9460              0x38,
9461              (unsigned char)0xF6,
9462              (0xC0 | encode));
9463 }
9464 
9465 void Assembler::andq(Address dst, int32_t imm32) {
9466   InstructionMark im(this);
9467   emit_int16(get_prefixq(dst), (unsigned char)0x81);
9468   emit_operand(rsp, dst, 4);
9469   emit_int32(imm32);
9470 }
9471 
9472 void Assembler::andq(Register dst, int32_t imm32) {
9473   (void) prefixq_and_encode(dst->encoding());
9474   emit_arith(0x81, 0xE0, dst, imm32);
9475 }
9476 
9477 void Assembler::andq(Register dst, Address src) {
9478   InstructionMark im(this);
9479   emit_int16(get_prefixq(src, dst), 0x23);
9480   emit_operand(dst, src);
9481 }
9482 
9483 void Assembler::andq(Register dst, Register src) {
9484   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9485   emit_arith(0x23, 0xC0, dst, src);
9486 }
9487 
9488 void Assembler::andnq(Register dst, Register src1, Register src2) {
9489   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9490   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9491   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9492   emit_int16((unsigned char)0xF2, (0xC0 | encode));
9493 }
9494 
9495 void Assembler::andnq(Register dst, Register src1, Address src2) {
9496   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9497   InstructionMark im(this);
9498   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9499   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9500   emit_int8((unsigned char)0xF2);
9501   emit_operand(dst, src2);
9502 }
9503 
9504 void Assembler::bsfq(Register dst, Register src) {
9505   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9506   emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
9507 }
9508 
9509 void Assembler::bsrq(Register dst, Register src) {
9510   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9511   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
9512 }
9513 
9514 void Assembler::bswapq(Register reg) {
9515   int encode = prefixq_and_encode(reg->encoding());
9516   emit_int16(0x0F, (0xC8 | encode));
9517 }
9518 
9519 void Assembler::blsiq(Register dst, Register src) {
9520   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9521   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9522   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9523   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9524 }
9525 
9526 void Assembler::blsiq(Register dst, Address src) {
9527   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9528   InstructionMark im(this);
9529   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9530   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9531   emit_int8((unsigned char)0xF3);
9532   emit_operand(rbx, src);
9533 }
9534 
9535 void Assembler::blsmskq(Register dst, Register src) {
9536   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9537   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9538   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9539   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9540 }
9541 
9542 void Assembler::blsmskq(Register dst, Address src) {
9543   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9544   InstructionMark im(this);
9545   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9546   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9547   emit_int8((unsigned char)0xF3);
9548   emit_operand(rdx, src);
9549 }
9550 
9551 void Assembler::blsrq(Register dst, Register src) {
9552   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9553   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9554   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9555   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9556 }
9557 
9558 void Assembler::blsrq(Register dst, Address src) {
9559   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9560   InstructionMark im(this);
9561   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9562   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9563   emit_int8((unsigned char)0xF3);
9564   emit_operand(rcx, src);
9565 }
9566 
9567 void Assembler::cdqq() {
9568   emit_int16(REX_W, (unsigned char)0x99);
9569 }
9570 
9571 void Assembler::clflush(Address adr) {
9572   assert(VM_Version::supports_clflush(), "should do");
9573   prefix(adr);
9574   emit_int16(0x0F, (unsigned char)0xAE);
9575   emit_operand(rdi, adr);
9576 }
9577 
9578 void Assembler::clflushopt(Address adr) {
9579   assert(VM_Version::supports_clflushopt(), "should do!");
9580   // adr should be base reg only with no index or offset
9581   assert(adr.index() == noreg, "index should be noreg");
9582   assert(adr.scale() == Address::no_scale, "scale should be no_scale");
9583   assert(adr.disp() == 0, "displacement should be 0");
9584   // instruction prefix is 0x66
9585   emit_int8(0x66);
9586   prefix(adr);
9587   // opcode family is 0x0F 0xAE
9588   emit_int16(0x0F, (unsigned char)0xAE);
9589   // extended opcode byte is 7 == rdi
9590   emit_operand(rdi, adr);
9591 }
9592 
9593 void Assembler::clwb(Address adr) {
9594   assert(VM_Version::supports_clwb(), "should do!");
9595   // adr should be base reg only with no index or offset
9596   assert(adr.index() == noreg, "index should be noreg");
9597   assert(adr.scale() == Address::no_scale, "scale should be no_scale");
9598   assert(adr.disp() == 0, "displacement should be 0");
9599   // instruction prefix is 0x66
9600   emit_int8(0x66);
9601   prefix(adr);
9602   // opcode family is 0x0f 0xAE
9603   emit_int16(0x0F, (unsigned char)0xAE);
9604   // extended opcode byte is 6 == rsi
9605   emit_operand(rsi, adr);
9606 }
9607 
9608 void Assembler::cmovq(Condition cc, Register dst, Register src) {
9609   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9610   emit_int24(0x0F, (0x40 | cc), (0xC0 | encode));
9611 }
9612 
9613 void Assembler::cmovq(Condition cc, Register dst, Address src) {
9614   InstructionMark im(this);
9615   emit_int24(get_prefixq(src, dst), 0x0F, (0x40 | cc));
9616   emit_operand(dst, src);
9617 }
9618 
9619 void Assembler::cmpq(Address dst, int32_t imm32) {
9620   InstructionMark im(this);
9621   emit_int16(get_prefixq(dst), (unsigned char)0x81);
9622   emit_operand(rdi, dst, 4);
9623   emit_int32(imm32);
9624 }
9625 
9626 void Assembler::cmpq(Register dst, int32_t imm32) {
9627   (void) prefixq_and_encode(dst->encoding());
9628   emit_arith(0x81, 0xF8, dst, imm32);
9629 }
9630 
9631 void Assembler::cmpq(Address dst, Register src) {
9632   InstructionMark im(this);
9633   emit_int16(get_prefixq(dst, src), 0x3B);
9634   emit_operand(src, dst);
9635 }
9636 
9637 void Assembler::cmpq(Register dst, Register src) {
9638   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9639   emit_arith(0x3B, 0xC0, dst, src);
9640 }
9641 
9642 void Assembler::cmpq(Register dst, Address src) {
9643   InstructionMark im(this);
9644   emit_int16(get_prefixq(src, dst), 0x3B);
9645   emit_operand(dst, src);
9646 }
9647 
9648 void Assembler::cmpxchgq(Register reg, Address adr) {
9649   InstructionMark im(this);
9650   emit_int24(get_prefixq(adr, reg), 0x0F, (unsigned char)0xB1);
9651   emit_operand(reg, adr);
9652 }
9653 
9654 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
9655   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9656   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9657   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9658   emit_int16(0x2A, (0xC0 | encode));
9659 }
9660 
9661 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
9662   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9663   InstructionMark im(this);
9664   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9665   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9666   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9667   emit_int8(0x2A);
9668   emit_operand(dst, src);
9669 }
9670 
9671 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
9672   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9673   InstructionMark im(this);
9674   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9675   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9676   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9677   emit_int8(0x2A);
9678   emit_operand(dst, src);
9679 }
9680 
9681 void Assembler::cvttsd2siq(Register dst, Address src) {
9682   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9683   // F2 REX.W 0F 2C /r
9684   // CVTTSD2SI r64, xmm1/m64
9685   InstructionMark im(this);
9686   emit_int32((unsigned char)0xF2, REX_W, 0x0F, 0x2C);
9687   emit_operand(dst, src);
9688 }
9689 
9690 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
9691   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9692   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9693   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9694   emit_int16(0x2C, (0xC0 | encode));
9695 }
9696 
9697 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
9698   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9699   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9700   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9701   emit_int16(0x2C, (0xC0 | encode));
9702 }
9703 
9704 void Assembler::decl(Register dst) {
9705   // Don't use it directly. Use MacroAssembler::decrementl() instead.
9706   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
9707   int encode = prefix_and_encode(dst->encoding());
9708   emit_int16((unsigned char)0xFF, (0xC8 | encode));
9709 }
9710 
9711 void Assembler::decq(Register dst) {
9712   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9713   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9714   int encode = prefixq_and_encode(dst->encoding());
9715   emit_int16((unsigned char)0xFF, 0xC8 | encode);
9716 }
9717 
9718 void Assembler::decq(Address dst) {
9719   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9720   InstructionMark im(this);
9721   emit_int16(get_prefixq(dst), (unsigned char)0xFF);
9722   emit_operand(rcx, dst);
9723 }
9724 
9725 void Assembler::fxrstor(Address src) {
9726   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
9727   emit_operand(as_Register(1), src);
9728 }
9729 
9730 void Assembler::xrstor(Address src) {
9731   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
9732   emit_operand(as_Register(5), src);
9733 }
9734 
9735 void Assembler::fxsave(Address dst) {
9736   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
9737   emit_operand(as_Register(0), dst);
9738 }
9739 
9740 void Assembler::xsave(Address dst) {
9741   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
9742   emit_operand(as_Register(4), dst);
9743 }
9744 
9745 void Assembler::idivq(Register src) {
9746   int encode = prefixq_and_encode(src->encoding());
9747   emit_int16((unsigned char)0xF7, (0xF8 | encode));
9748 }
9749 
9750 void Assembler::imulq(Register dst, Register src) {
9751   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9752   emit_int24(0x0F, (unsigned char)0xAF, (0xC0 | encode));
9753 }
9754 
9755 void Assembler::imulq(Register dst, Register src, int value) {
9756   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9757   if (is8bit(value)) {
9758     emit_int24(0x6B, (0xC0 | encode), (value & 0xFF));
9759   } else {
9760     emit_int16(0x69, (0xC0 | encode));
9761     emit_int32(value);
9762   }
9763 }
9764 
9765 void Assembler::imulq(Register dst, Address src) {
9766   InstructionMark im(this);
9767   emit_int24(get_prefixq(src, dst), 0x0F, (unsigned char)0xAF);
9768   emit_operand(dst, src);
9769 }
9770 
9771 void Assembler::incl(Register dst) {
9772   // Don't use it directly. Use MacroAssembler::incrementl() instead.
9773   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9774   int encode = prefix_and_encode(dst->encoding());
9775   emit_int16((unsigned char)0xFF, (0xC0 | encode));
9776 }
9777 
9778 void Assembler::incq(Register dst) {
9779   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9780   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9781   int encode = prefixq_and_encode(dst->encoding());
9782   emit_int16((unsigned char)0xFF, (0xC0 | encode));
9783 }
9784 
9785 void Assembler::incq(Address dst) {
9786   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9787   InstructionMark im(this);
9788   emit_int16(get_prefixq(dst), (unsigned char)0xFF);
9789   emit_operand(rax, dst);
9790 }
9791 
9792 void Assembler::lea(Register dst, Address src) {
9793   leaq(dst, src);
9794 }
9795 
9796 void Assembler::leaq(Register dst, Address src) {
9797   InstructionMark im(this);
9798   emit_int16(get_prefixq(src, dst), (unsigned char)0x8D);
9799   emit_operand(dst, src);
9800 }
9801 
9802 void Assembler::mov64(Register dst, int64_t imm64) {
9803   InstructionMark im(this);
9804   int encode = prefixq_and_encode(dst->encoding());
9805   emit_int8(0xB8 | encode);
9806   emit_int64(imm64);
9807 }
9808 
9809 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
9810   InstructionMark im(this);
9811   int encode = prefixq_and_encode(dst->encoding());
9812   emit_int8(0xB8 | encode);
9813   emit_data64(imm64, rspec);
9814 }
9815 
9816 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
9817   InstructionMark im(this);
9818   int encode = prefix_and_encode(dst->encoding());
9819   emit_int8(0xB8 | encode);
9820   emit_data((int)imm32, rspec, narrow_oop_operand);
9821 }
9822 
9823 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
9824   InstructionMark im(this);
9825   prefix(dst);
9826   emit_int8((unsigned char)0xC7);
9827   emit_operand(rax, dst, 4);
9828   emit_data((int)imm32, rspec, narrow_oop_operand);
9829 }
9830 
9831 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
9832   InstructionMark im(this);
9833   int encode = prefix_and_encode(src1->encoding());
9834   emit_int16((unsigned char)0x81, (0xF8 | encode));
9835   emit_data((int)imm32, rspec, narrow_oop_operand);
9836 }
9837 
9838 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
9839   InstructionMark im(this);
9840   prefix(src1);
9841   emit_int8((unsigned char)0x81);
9842   emit_operand(rax, src1, 4);
9843   emit_data((int)imm32, rspec, narrow_oop_operand);
9844 }
9845 
9846 void Assembler::lzcntq(Register dst, Register src) {
9847   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
9848   emit_int8((unsigned char)0xF3);
9849   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9850   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
9851 }
9852 
9853 void Assembler::movdq(XMMRegister dst, Register src) {
9854   // table D-1 says MMX/SSE2
9855   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9856   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9857   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9858   emit_int16(0x6E, (0xC0 | encode));
9859 }
9860 
9861 void Assembler::movdq(Register dst, XMMRegister src) {
9862   // table D-1 says MMX/SSE2
9863   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9864   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9865   // swap src/dst to get correct prefix
9866   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9867   emit_int16(0x7E,
9868              (0xC0 | encode));
9869 }
9870 
9871 void Assembler::movq(Register dst, Register src) {
9872   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9873   emit_int16((unsigned char)0x8B,
9874              (0xC0 | encode));
9875 }
9876 
9877 void Assembler::movq(Register dst, Address src) {
9878   InstructionMark im(this);
9879   emit_int16(get_prefixq(src, dst), (unsigned char)0x8B);
9880   emit_operand(dst, src);
9881 }
9882 
9883 void Assembler::movq(Address dst, Register src) {
9884   InstructionMark im(this);
9885   emit_int16(get_prefixq(dst, src), (unsigned char)0x89);
9886   emit_operand(src, dst);
9887 }
9888 
9889 void Assembler::movsbq(Register dst, Address src) {
9890   InstructionMark im(this);
9891   emit_int24(get_prefixq(src, dst),
9892              0x0F,
9893              (unsigned char)0xBE);
9894   emit_operand(dst, src);
9895 }
9896 
9897 void Assembler::movsbq(Register dst, Register src) {
9898   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9899   emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
9900 }
9901 
9902 void Assembler::movslq(Register dst, int32_t imm32) {
9903   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
9904   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
9905   // as a result we shouldn't use until tested at runtime...
9906   ShouldNotReachHere();
9907   InstructionMark im(this);
9908   int encode = prefixq_and_encode(dst->encoding());
9909   emit_int8(0xC7 | encode);
9910   emit_int32(imm32);
9911 }
9912 
9913 void Assembler::movslq(Address dst, int32_t imm32) {
9914   assert(is_simm32(imm32), "lost bits");
9915   InstructionMark im(this);
9916   emit_int16(get_prefixq(dst), (unsigned char)0xC7);
9917   emit_operand(rax, dst, 4);
9918   emit_int32(imm32);
9919 }
9920 
9921 void Assembler::movslq(Register dst, Address src) {
9922   InstructionMark im(this);
9923   emit_int16(get_prefixq(src, dst), 0x63);
9924   emit_operand(dst, src);
9925 }
9926 
9927 void Assembler::movslq(Register dst, Register src) {
9928   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9929   emit_int16(0x63, (0xC0 | encode));
9930 }
9931 
9932 void Assembler::movswq(Register dst, Address src) {
9933   InstructionMark im(this);
9934   emit_int24(get_prefixq(src, dst),
9935              0x0F,
9936              (unsigned char)0xBF);
9937   emit_operand(dst, src);
9938 }
9939 
9940 void Assembler::movswq(Register dst, Register src) {
9941   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9942   emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
9943 }
9944 
9945 void Assembler::movzbq(Register dst, Address src) {
9946   InstructionMark im(this);
9947   emit_int24(get_prefixq(src, dst),
9948              0x0F,
9949              (unsigned char)0xB6);
9950   emit_operand(dst, src);
9951 }
9952 
9953 void Assembler::movzbq(Register dst, Register src) {
9954   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9955   emit_int24(0x0F, (unsigned char)0xB6, (0xC0 | encode));
9956 }
9957 
9958 void Assembler::movzwq(Register dst, Address src) {
9959   InstructionMark im(this);
9960   emit_int24(get_prefixq(src, dst),
9961              0x0F,
9962              (unsigned char)0xB7);
9963   emit_operand(dst, src);
9964 }
9965 
9966 void Assembler::movzwq(Register dst, Register src) {
9967   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9968   emit_int24(0x0F, (unsigned char)0xB7, (0xC0 | encode));
9969 }
9970 
9971 void Assembler::mulq(Address src) {
9972   InstructionMark im(this);
9973   emit_int16(get_prefixq(src), (unsigned char)0xF7);
9974   emit_operand(rsp, src);
9975 }
9976 
9977 void Assembler::mulq(Register src) {
9978   int encode = prefixq_and_encode(src->encoding());
9979   emit_int16((unsigned char)0xF7, (0xE0 | encode));
9980 }
9981 
9982 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
9983   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9984   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9985   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9986   emit_int16((unsigned char)0xF6, (0xC0 | encode));
9987 }
9988 
9989 void Assembler::negq(Register dst) {
9990   int encode = prefixq_and_encode(dst->encoding());
9991   emit_int16((unsigned char)0xF7, (0xD8 | encode));
9992 }
9993 
9994 void Assembler::notq(Register dst) {
9995   int encode = prefixq_and_encode(dst->encoding());
9996   emit_int16((unsigned char)0xF7, (0xD0 | encode));
9997 }
9998 
9999 void Assembler::btsq(Address dst, int imm8) {
10000   assert(isByte(imm8), "not a byte");
10001   InstructionMark im(this);
10002   emit_int24(get_prefixq(dst),
10003              0x0F,
10004              (unsigned char)0xBA);
10005   emit_operand(rbp /* 5 */, dst, 1);
10006   emit_int8(imm8);
10007 }
10008 
10009 void Assembler::btrq(Address dst, int imm8) {
10010   assert(isByte(imm8), "not a byte");
10011   InstructionMark im(this);
10012   emit_int24(get_prefixq(dst),
10013              0x0F,
10014              (unsigned char)0xBA);
10015   emit_operand(rsi /* 6 */, dst, 1);
10016   emit_int8(imm8);
10017 }
10018 
10019 void Assembler::orq(Address dst, int32_t imm32) {
10020   InstructionMark im(this);
10021   emit_int16(get_prefixq(dst), (unsigned char)0x81);
10022   emit_operand(rcx, dst, 4);
10023   emit_int32(imm32);
10024 }
10025 
10026 void Assembler::orq(Register dst, int32_t imm32) {
10027   (void) prefixq_and_encode(dst->encoding());
10028   emit_arith(0x81, 0xC8, dst, imm32);
10029 }
10030 
10031 void Assembler::orq(Register dst, Address src) {
10032   InstructionMark im(this);
10033   emit_int16(get_prefixq(src, dst), 0x0B);
10034   emit_operand(dst, src);
10035 }
10036 
10037 void Assembler::orq(Register dst, Register src) {
10038   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10039   emit_arith(0x0B, 0xC0, dst, src);
10040 }
10041 
10042 void Assembler::popcntq(Register dst, Address src) {
10043   assert(VM_Version::supports_popcnt(), "must support");
10044   InstructionMark im(this);
10045   emit_int32((unsigned char)0xF3,
10046              get_prefixq(src, dst),
10047              0x0F,
10048              (unsigned char)0xB8);
10049   emit_operand(dst, src);
10050 }
10051 
10052 void Assembler::popcntq(Register dst, Register src) {
10053   assert(VM_Version::supports_popcnt(), "must support");
10054   emit_int8((unsigned char)0xF3);
10055   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
10056   emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
10057 }
10058 
10059 void Assembler::popq(Address dst) {
10060   InstructionMark im(this);
10061   emit_int16(get_prefixq(dst), (unsigned char)0x8F);
10062   emit_operand(rax, dst);
10063 }
10064 
10065 // Precomputable: popa, pusha, vzeroupper
10066 
10067 // The result of these routines are invariant from one invocation to another
10068 // invocation for the duration of a run. Caching the result on bootstrap
10069 // and copying it out on subsequent invocations can thus be beneficial
10070 static bool     precomputed = false;
10071 
10072 static u_char* popa_code  = NULL;
10073 static int     popa_len   = 0;
10074 
10075 static u_char* pusha_code = NULL;
10076 static int     pusha_len  = 0;
10077 
10078 static u_char* vzup_code  = NULL;
10079 static int     vzup_len   = 0;
10080 
10081 void Assembler::precompute_instructions() {
10082   assert(!Universe::is_fully_initialized(), "must still be single threaded");
10083   guarantee(!precomputed, "only once");
10084   precomputed = true;
10085   ResourceMark rm;
10086 
10087   // Make a temporary buffer big enough for the routines we're capturing
10088   int size = 256;
10089   char* tmp_code = NEW_RESOURCE_ARRAY(char, size);
10090   CodeBuffer buffer((address)tmp_code, size);
10091   MacroAssembler masm(&buffer);
10092 
10093   address begin_popa  = masm.code_section()->end();
10094   masm.popa_uncached();
10095   address end_popa    = masm.code_section()->end();
10096   masm.pusha_uncached();
10097   address end_pusha   = masm.code_section()->end();
10098   masm.vzeroupper_uncached();
10099   address end_vzup    = masm.code_section()->end();
10100 
10101   // Save the instructions to permanent buffers.
10102   popa_len = (int)(end_popa - begin_popa);
10103   popa_code = NEW_C_HEAP_ARRAY(u_char, popa_len, mtInternal);
10104   memcpy(popa_code, begin_popa, popa_len);
10105 
10106   pusha_len = (int)(end_pusha - end_popa);
10107   pusha_code = NEW_C_HEAP_ARRAY(u_char, pusha_len, mtInternal);
10108   memcpy(pusha_code, end_popa, pusha_len);
10109 
10110   vzup_len = (int)(end_vzup - end_pusha);
10111   if (vzup_len > 0) {
10112     vzup_code = NEW_C_HEAP_ARRAY(u_char, vzup_len, mtInternal);
10113     memcpy(vzup_code, end_pusha, vzup_len);
10114   } else {
10115     vzup_code = pusha_code; // dummy
10116   }
10117 
10118   assert(masm.code()->total_oop_size() == 0 &&
10119          masm.code()->total_metadata_size() == 0 &&
10120          masm.code()->total_relocation_size() == 0,
10121          "pre-computed code can't reference oops, metadata or contain relocations");
10122 }
10123 
10124 static void emit_copy(CodeSection* code_section, u_char* src, int src_len) {
10125   assert(src != NULL, "code to copy must have been pre-computed");
10126   assert(code_section->limit() - code_section->end() > src_len, "code buffer not large enough");
10127   address end = code_section->end();
10128   memcpy(end, src, src_len);
10129   code_section->set_end(end + src_len);
10130 }
10131 
10132 void Assembler::popa() { // 64bit
10133   emit_copy(code_section(), popa_code, popa_len);
10134 }
10135 
10136 void Assembler::popa_uncached() { // 64bit
10137   movq(r15, Address(rsp, 0));
10138   movq(r14, Address(rsp, wordSize));
10139   movq(r13, Address(rsp, 2 * wordSize));
10140   movq(r12, Address(rsp, 3 * wordSize));
10141   movq(r11, Address(rsp, 4 * wordSize));
10142   movq(r10, Address(rsp, 5 * wordSize));
10143   movq(r9,  Address(rsp, 6 * wordSize));
10144   movq(r8,  Address(rsp, 7 * wordSize));
10145   movq(rdi, Address(rsp, 8 * wordSize));
10146   movq(rsi, Address(rsp, 9 * wordSize));
10147   movq(rbp, Address(rsp, 10 * wordSize));
10148   // Skip rsp as it is restored automatically to the value
10149   // before the corresponding pusha when popa is done.
10150   movq(rbx, Address(rsp, 12 * wordSize));
10151   movq(rdx, Address(rsp, 13 * wordSize));
10152   movq(rcx, Address(rsp, 14 * wordSize));
10153   movq(rax, Address(rsp, 15 * wordSize));
10154 
10155   addq(rsp, 16 * wordSize);
10156 }
10157 
10158 // Does not actually store the value of rsp on the stack.
10159 // The slot for rsp just contains an arbitrary value.
10160 void Assembler::pusha() { // 64bit
10161   emit_copy(code_section(), pusha_code, pusha_len);
10162 }
10163 
10164 // Does not actually store the value of rsp on the stack.
10165 // The slot for rsp just contains an arbitrary value.
10166 void Assembler::pusha_uncached() { // 64bit
10167   subq(rsp, 16 * wordSize);
10168 
10169   movq(Address(rsp, 15 * wordSize), rax);
10170   movq(Address(rsp, 14 * wordSize), rcx);
10171   movq(Address(rsp, 13 * wordSize), rdx);
10172   movq(Address(rsp, 12 * wordSize), rbx);
10173   // Skip rsp as the value is normally not used. There are a few places where
10174   // the original value of rsp needs to be known but that can be computed
10175   // from the value of rsp immediately after pusha (rsp + 16 * wordSize).
10176   movq(Address(rsp, 10 * wordSize), rbp);
10177   movq(Address(rsp, 9 * wordSize), rsi);
10178   movq(Address(rsp, 8 * wordSize), rdi);
10179   movq(Address(rsp, 7 * wordSize), r8);
10180   movq(Address(rsp, 6 * wordSize), r9);
10181   movq(Address(rsp, 5 * wordSize), r10);
10182   movq(Address(rsp, 4 * wordSize), r11);
10183   movq(Address(rsp, 3 * wordSize), r12);
10184   movq(Address(rsp, 2 * wordSize), r13);
10185   movq(Address(rsp, wordSize), r14);
10186   movq(Address(rsp, 0), r15);
10187 }
10188 
10189 void Assembler::vzeroupper() {
10190   emit_copy(code_section(), vzup_code, vzup_len);
10191 }
10192 
10193 void Assembler::pushq(Address src) {
10194   InstructionMark im(this);
10195   emit_int16(get_prefixq(src), (unsigned char)0xFF);
10196   emit_operand(rsi, src);
10197 }
10198 
10199 void Assembler::rclq(Register dst, int imm8) {
10200   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10201   int encode = prefixq_and_encode(dst->encoding());
10202   if (imm8 == 1) {
10203     emit_int16((unsigned char)0xD1, (0xD0 | encode));
10204   } else {
10205     emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
10206   }
10207 }
10208 
10209 void Assembler::rcrq(Register dst, int imm8) {
10210   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10211   int encode = prefixq_and_encode(dst->encoding());
10212   if (imm8 == 1) {
10213     emit_int16((unsigned char)0xD1, (0xD8 | encode));
10214   } else {
10215     emit_int24((unsigned char)0xC1, (0xD8 | encode), imm8);
10216   }
10217 }
10218 
10219 void Assembler::rorq(Register dst, int imm8) {
10220   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10221   int encode = prefixq_and_encode(dst->encoding());
10222   if (imm8 == 1) {
10223     emit_int16((unsigned char)0xD1, (0xC8 | encode));
10224   } else {
10225     emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
10226   }
10227 }
10228 
10229 void Assembler::rorxq(Register dst, Register src, int imm8) {
10230   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
10231   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
10232   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
10233   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
10234 }
10235 
10236 void Assembler::rorxd(Register dst, Register src, int imm8) {
10237   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
10238   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
10239   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
10240   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
10241 }
10242 
10243 void Assembler::sarq(Register dst, int imm8) {
10244   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10245   int encode = prefixq_and_encode(dst->encoding());
10246   if (imm8 == 1) {
10247     emit_int16((unsigned char)0xD1, (0xF8 | encode));
10248   } else {
10249     emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
10250   }
10251 }
10252 
10253 void Assembler::sarq(Register dst) {
10254   int encode = prefixq_and_encode(dst->encoding());
10255   emit_int16((unsigned char)0xD3, (0xF8 | encode));
10256 }
10257 
10258 void Assembler::sbbq(Address dst, int32_t imm32) {
10259   InstructionMark im(this);
10260   prefixq(dst);
10261   emit_arith_operand(0x81, rbx, dst, imm32);
10262 }
10263 
10264 void Assembler::sbbq(Register dst, int32_t imm32) {
10265   (void) prefixq_and_encode(dst->encoding());
10266   emit_arith(0x81, 0xD8, dst, imm32);
10267 }
10268 
10269 void Assembler::sbbq(Register dst, Address src) {
10270   InstructionMark im(this);
10271   emit_int16(get_prefixq(src, dst), 0x1B);
10272   emit_operand(dst, src);
10273 }
10274 
10275 void Assembler::sbbq(Register dst, Register src) {
10276   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10277   emit_arith(0x1B, 0xC0, dst, src);
10278 }
10279 
10280 void Assembler::shlq(Register dst, int imm8) {
10281   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10282   int encode = prefixq_and_encode(dst->encoding());
10283   if (imm8 == 1) {
10284     emit_int16((unsigned char)0xD1, (0xE0 | encode));
10285   } else {
10286     emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
10287   }
10288 }
10289 
10290 void Assembler::shlq(Register dst) {
10291   int encode = prefixq_and_encode(dst->encoding());
10292   emit_int16((unsigned char)0xD3, (0xE0 | encode));
10293 }
10294 
10295 void Assembler::shrq(Register dst, int imm8) {
10296   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10297   int encode = prefixq_and_encode(dst->encoding());
10298   emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
10299 }
10300 
10301 void Assembler::shrq(Register dst) {
10302   int encode = prefixq_and_encode(dst->encoding());
10303   emit_int16((unsigned char)0xD3, 0xE8 | encode);
10304 }
10305 
10306 void Assembler::subq(Address dst, int32_t imm32) {
10307   InstructionMark im(this);
10308   prefixq(dst);
10309   emit_arith_operand(0x81, rbp, dst, imm32);
10310 }
10311 
10312 void Assembler::subq(Address dst, Register src) {
10313   InstructionMark im(this);
10314   emit_int16(get_prefixq(dst, src), 0x29);
10315   emit_operand(src, dst);
10316 }
10317 
10318 void Assembler::subq(Register dst, int32_t imm32) {
10319   (void) prefixq_and_encode(dst->encoding());
10320   emit_arith(0x81, 0xE8, dst, imm32);
10321 }
10322 
10323 // Force generation of a 4 byte immediate value even if it fits into 8bit
10324 void Assembler::subq_imm32(Register dst, int32_t imm32) {
10325   (void) prefixq_and_encode(dst->encoding());
10326   emit_arith_imm32(0x81, 0xE8, dst, imm32);
10327 }
10328 
10329 void Assembler::subq(Register dst, Address src) {
10330   InstructionMark im(this);
10331   emit_int16(get_prefixq(src, dst), 0x2B);
10332   emit_operand(dst, src);
10333 }
10334 
10335 void Assembler::subq(Register dst, Register src) {
10336   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10337   emit_arith(0x2B, 0xC0, dst, src);
10338 }
10339 
10340 void Assembler::testq(Register dst, int32_t imm32) {
10341   // not using emit_arith because test
10342   // doesn't support sign-extension of
10343   // 8bit operands
10344   int encode = dst->encoding();
10345   if (encode == 0) {
10346     emit_int16(REX_W, (unsigned char)0xA9);
10347   } else {
10348     encode = prefixq_and_encode(encode);
10349     emit_int16((unsigned char)0xF7, (0xC0 | encode));
10350   }
10351   emit_int32(imm32);
10352 }
10353 
10354 void Assembler::testq(Register dst, Register src) {
10355   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10356   emit_arith(0x85, 0xC0, dst, src);
10357 }
10358 
10359 void Assembler::testq(Register dst, Address src) {
10360   InstructionMark im(this);
10361   emit_int16(get_prefixq(src, dst), (unsigned char)0x85);
10362   emit_operand(dst, src);
10363 }
10364 
10365 void Assembler::xaddq(Address dst, Register src) {
10366   InstructionMark im(this);
10367   emit_int24(get_prefixq(dst, src), 0x0F, (unsigned char)0xC1);
10368   emit_operand(src, dst);
10369 }
10370 
10371 void Assembler::xchgq(Register dst, Address src) {
10372   InstructionMark im(this);
10373   emit_int16(get_prefixq(src, dst), (unsigned char)0x87);
10374   emit_operand(dst, src);
10375 }
10376 
10377 void Assembler::xchgq(Register dst, Register src) {
10378   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
10379   emit_int16((unsigned char)0x87, (0xc0 | encode));
10380 }
10381 
10382 void Assembler::xorq(Register dst, Register src) {
10383   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10384   emit_arith(0x33, 0xC0, dst, src);
10385 }
10386 
10387 void Assembler::xorq(Register dst, Address src) {
10388   InstructionMark im(this);
10389   emit_int16(get_prefixq(src, dst), 0x33);
10390   emit_operand(dst, src);
10391 }
10392 
10393 #endif // !LP64