New src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 
  40 #ifdef PRODUCT
  41 #define BLOCK_COMMENT(str) /* nothing */
  42 #define STOP(error) stop(error)
  43 #else
  44 #define BLOCK_COMMENT(str) block_comment(str)
  45 #define STOP(error) block_comment(error); stop(error)
  46 #endif
  47 
  48 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  49 // Implementation of AddressLiteral
  50 
  51 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  52 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  53   // -----------------Table 4.5 -------------------- //
  54   16, 32, 64,  // EVEX_FV(0)
  55   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  56   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  57   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  58   8,  16, 32,  // EVEX_HV(0)
  59   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  60   // -----------------Table 4.6 -------------------- //
  61   16, 32, 64,  // EVEX_FVM(0)
  62   1,  1,  1,   // EVEX_T1S(0)
  63   2,  2,  2,   // EVEX_T1S(1)
  64   4,  4,  4,   // EVEX_T1S(2)
  65   8,  8,  8,   // EVEX_T1S(3)
  66   4,  4,  4,   // EVEX_T1F(0)
  67   8,  8,  8,   // EVEX_T1F(1)
  68   8,  8,  8,   // EVEX_T2(0)
  69   0,  16, 16,  // EVEX_T2(1)
  70   0,  16, 16,  // EVEX_T4(0)
  71   0,  0,  32,  // EVEX_T4(1)
  72   0,  0,  32,  // EVEX_T8(0)
  73   8,  16, 32,  // EVEX_HVM(0)
  74   4,  8,  16,  // EVEX_QVM(0)
  75   2,  4,  8,   // EVEX_OVM(0)
  76   16, 16, 16,  // EVEX_M128(0)
  77   8,  32, 64,  // EVEX_DUP(0)
  78   0,  0,  0    // EVEX_NTUP
  79 };
  80 
  81 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  82   _is_lval = false;
  83   _target = target;
  84   switch (rtype) {
  85   case relocInfo::oop_type:
  86   case relocInfo::metadata_type:
  87     // Oops are a special case. Normally they would be their own section
  88     // but in cases like icBuffer they are literals in the code stream that
  89     // we don't have a section for. We use none so that we get a literal address
  90     // which is always patchable.
  91     break;
  92   case relocInfo::external_word_type:
  93     _rspec = external_word_Relocation::spec(target);
  94     break;
  95   case relocInfo::internal_word_type:
  96     _rspec = internal_word_Relocation::spec(target);
  97     break;
  98   case relocInfo::opt_virtual_call_type:
  99     _rspec = opt_virtual_call_Relocation::spec();
 100     break;
 101   case relocInfo::static_call_type:
 102     _rspec = static_call_Relocation::spec();
 103     break;
 104   case relocInfo::runtime_call_type:
 105     _rspec = runtime_call_Relocation::spec();
 106     break;
 107   case relocInfo::poll_type:
 108   case relocInfo::poll_return_type:
 109     _rspec = Relocation::spec_simple(rtype);
 110     break;
 111   case relocInfo::none:
 112     break;
 113   default:
 114     ShouldNotReachHere();
 115     break;
 116   }
 117 }
 118 
 119 // Implementation of Address
 120 
 121 #ifdef _LP64
 122 
 123 Address Address::make_array(ArrayAddress adr) {
 124   // Not implementable on 64bit machines
 125   // Should have been handled higher up the call chain.
 126   ShouldNotReachHere();
 127   return Address();
 128 }
 129 
 130 // exceedingly dangerous constructor
 131 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 132   _base  = noreg;
 133   _index = noreg;
 134   _scale = no_scale;
 135   _disp  = disp;
 136   _xmmindex = xnoreg;
 137   _isxmmindex = false;
 138   switch (rtype) {
 139     case relocInfo::external_word_type:
 140       _rspec = external_word_Relocation::spec(loc);
 141       break;
 142     case relocInfo::internal_word_type:
 143       _rspec = internal_word_Relocation::spec(loc);
 144       break;
 145     case relocInfo::runtime_call_type:
 146       // HMM
 147       _rspec = runtime_call_Relocation::spec();
 148       break;
 149     case relocInfo::poll_type:
 150     case relocInfo::poll_return_type:
 151       _rspec = Relocation::spec_simple(rtype);
 152       break;
 153     case relocInfo::none:
 154       break;
 155     default:
 156       ShouldNotReachHere();
 157   }
 158 }
 159 #else // LP64
 160 
 161 Address Address::make_array(ArrayAddress adr) {
 162   AddressLiteral base = adr.base();
 163   Address index = adr.index();
 164   assert(index._disp == 0, "must not have disp"); // maybe it can?
 165   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 166   array._rspec = base._rspec;
 167   return array;
 168 }
 169 
 170 // exceedingly dangerous constructor
 171 Address::Address(address loc, RelocationHolder spec) {
 172   _base  = noreg;
 173   _index = noreg;
 174   _scale = no_scale;
 175   _disp  = (intptr_t) loc;
 176   _rspec = spec;
 177   _xmmindex = xnoreg;
 178   _isxmmindex = false;
 179 }
 180 
 181 #endif // _LP64
 182 
 183 
 184 
 185 // Convert the raw encoding form into the form expected by the constructor for
 186 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 187 // that to noreg for the Address constructor.
 188 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 189   RelocationHolder rspec;
 190   if (disp_reloc != relocInfo::none) {
 191     rspec = Relocation::spec_simple(disp_reloc);
 192   }
 193   bool valid_index = index != rsp->encoding();
 194   if (valid_index) {
 195     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 196     madr._rspec = rspec;
 197     return madr;
 198   } else {
 199     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 200     madr._rspec = rspec;
 201     return madr;
 202   }
 203 }
 204 
 205 // Implementation of Assembler
 206 
 207 int AbstractAssembler::code_fill_byte() {
 208   return (u_char)'\xF4'; // hlt
 209 }
 210 
 211 // make this go away someday
 212 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 213   if (rtype == relocInfo::none)
 214     emit_int32(data);
 215   else
 216     emit_data(data, Relocation::spec_simple(rtype), format);
 217 }
 218 
 219 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 220   assert(imm_operand == 0, "default format must be immediate in this file");
 221   assert(inst_mark() != NULL, "must be inside InstructionMark");
 222   if (rspec.type() !=  relocInfo::none) {
 223     #ifdef ASSERT
 224       check_relocation(rspec, format);
 225     #endif
 226     // Do not use AbstractAssembler::relocate, which is not intended for
 227     // embedded words.  Instead, relocate to the enclosing instruction.
 228 
 229     // hack. call32 is too wide for mask so use disp32
 230     if (format == call32_operand)
 231       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 232     else
 233       code_section()->relocate(inst_mark(), rspec, format);
 234   }
 235   emit_int32(data);
 236 }
 237 
 238 static int encode(Register r) {
 239   int enc = r->encoding();
 240   if (enc >= 8) {
 241     enc -= 8;
 242   }
 243   return enc;
 244 }
 245 
 246 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 247   assert(dst->has_byte_register(), "must have byte register");
 248   assert(isByte(op1) && isByte(op2), "wrong opcode");
 249   assert(isByte(imm8), "not a byte");
 250   assert((op1 & 0x01) == 0, "should be 8bit operation");
 251   emit_int24(op1, (op2 | encode(dst)), imm8);
 252 }
 253 
 254 
 255 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 256   assert(isByte(op1) && isByte(op2), "wrong opcode");
 257   assert((op1 & 0x01) == 1, "should be 32bit operation");
 258   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 259   if (is8bit(imm32)) {
 260     emit_int24(op1 | 0x02,        // set sign bit
 261                op2 | encode(dst),
 262                imm32 & 0xFF);
 263   } else {
 264     emit_int16(op1, (op2 | encode(dst)));
 265     emit_int32(imm32);
 266   }
 267 }
 268 
 269 // Force generation of a 4 byte immediate value even if it fits into 8bit
 270 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 271   assert(isByte(op1) && isByte(op2), "wrong opcode");
 272   assert((op1 & 0x01) == 1, "should be 32bit operation");
 273   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 274   emit_int16(op1, (op2 | encode(dst)));
 275   emit_int32(imm32);
 276 }
 277 
 278 // immediate-to-memory forms
 279 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 280   assert((op1 & 0x01) == 1, "should be 32bit operation");
 281   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 282   if (is8bit(imm32)) {
 283     emit_int8(op1 | 0x02); // set sign bit
 284     emit_operand(rm, adr, 1);
 285     emit_int8(imm32 & 0xFF);
 286   } else {
 287     emit_int8(op1);
 288     emit_operand(rm, adr, 4);
 289     emit_int32(imm32);
 290   }
 291 }
 292 
 293 
 294 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 295   assert(isByte(op1) && isByte(op2), "wrong opcode");
 296   emit_int16(op1, (op2 | encode(dst) << 3 | encode(src)));
 297 }
 298 
 299 
 300 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 301                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 302   int mod_idx = 0;
 303   // We will test if the displacement fits the compressed format and if so
 304   // apply the compression to the displacment iff the result is8bit.
 305   if (VM_Version::supports_evex() && is_evex_inst) {
 306     switch (cur_tuple_type) {
 307     case EVEX_FV:
 308       if ((cur_encoding & VEX_W) == VEX_W) {
 309         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 310       } else {
 311         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 312       }
 313       break;
 314 
 315     case EVEX_HV:
 316       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       break;
 318 
 319     case EVEX_FVM:
 320       break;
 321 
 322     case EVEX_T1S:
 323       switch (in_size_in_bits) {
 324       case EVEX_8bit:
 325         break;
 326 
 327       case EVEX_16bit:
 328         mod_idx = 1;
 329         break;
 330 
 331       case EVEX_32bit:
 332         mod_idx = 2;
 333         break;
 334 
 335       case EVEX_64bit:
 336         mod_idx = 3;
 337         break;
 338       }
 339       break;
 340 
 341     case EVEX_T1F:
 342     case EVEX_T2:
 343     case EVEX_T4:
 344       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 345       break;
 346 
 347     case EVEX_T8:
 348       break;
 349 
 350     case EVEX_HVM:
 351       break;
 352 
 353     case EVEX_QVM:
 354       break;
 355 
 356     case EVEX_OVM:
 357       break;
 358 
 359     case EVEX_M128:
 360       break;
 361 
 362     case EVEX_DUP:
 363       break;
 364 
 365     default:
 366       assert(0, "no valid evex tuple_table entry");
 367       break;
 368     }
 369 
 370     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 371       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 372       if ((disp % disp_factor) == 0) {
 373         int new_disp = disp / disp_factor;
 374         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 375           disp = new_disp;
 376         }
 377       } else {
 378         return false;
 379       }
 380     }
 381   }
 382   return (-0x80 <= disp && disp < 0x80);
 383 }
 384 
 385 
 386 bool Assembler::emit_compressed_disp_byte(int &disp) {
 387   int mod_idx = 0;
 388   // We will test if the displacement fits the compressed format and if so
 389   // apply the compression to the displacment iff the result is8bit.
 390   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 391     int evex_encoding = _attributes->get_evex_encoding();
 392     int tuple_type = _attributes->get_tuple_type();
 393     switch (tuple_type) {
 394     case EVEX_FV:
 395       if ((evex_encoding & VEX_W) == VEX_W) {
 396         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 397       } else {
 398         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 399       }
 400       break;
 401 
 402     case EVEX_HV:
 403       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       break;
 405 
 406     case EVEX_FVM:
 407       break;
 408 
 409     case EVEX_T1S:
 410       switch (_attributes->get_input_size()) {
 411       case EVEX_8bit:
 412         break;
 413 
 414       case EVEX_16bit:
 415         mod_idx = 1;
 416         break;
 417 
 418       case EVEX_32bit:
 419         mod_idx = 2;
 420         break;
 421 
 422       case EVEX_64bit:
 423         mod_idx = 3;
 424         break;
 425       }
 426       break;
 427 
 428     case EVEX_T1F:
 429     case EVEX_T2:
 430     case EVEX_T4:
 431       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 432       break;
 433 
 434     case EVEX_T8:
 435       break;
 436 
 437     case EVEX_HVM:
 438       break;
 439 
 440     case EVEX_QVM:
 441       break;
 442 
 443     case EVEX_OVM:
 444       break;
 445 
 446     case EVEX_M128:
 447       break;
 448 
 449     case EVEX_DUP:
 450       break;
 451 
 452     default:
 453       assert(0, "no valid evex tuple_table entry");
 454       break;
 455     }
 456 
 457     int vector_len = _attributes->get_vector_len();
 458     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 459       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 460       if ((disp % disp_factor) == 0) {
 461         int new_disp = disp / disp_factor;
 462         if (is8bit(new_disp)) {
 463           disp = new_disp;
 464         }
 465       } else {
 466         return false;
 467       }
 468     }
 469   }
 470   return is8bit(disp);
 471 }
 472 
 473 static bool is_valid_encoding(int reg_enc) {
 474   return reg_enc >= 0;
 475 }
 476 
 477 static int raw_encode(Register reg) {
 478   assert(reg == noreg || reg->is_valid(), "sanity");
 479   int reg_enc = (intptr_t)reg;
 480   assert(reg_enc == -1 || is_valid_encoding(reg_enc), "sanity");
 481   return reg_enc;
 482 }
 483 
 484 static int raw_encode(XMMRegister xmmreg) {
 485   assert(xmmreg == xnoreg || xmmreg->is_valid(), "sanity");
 486   int xmmreg_enc = (intptr_t)xmmreg;
 487   assert(xmmreg_enc == -1 || is_valid_encoding(xmmreg_enc), "sanity");
 488   return xmmreg_enc;
 489 }
 490 
 491 static int modrm_encoding(int mod, int dst_enc, int src_enc) {
 492   return (mod & 3) << 6 | (dst_enc & 7) << 3 | (src_enc & 7);
 493 }
 494 
 495 static int sib_encoding(Address::ScaleFactor scale, int index_enc, int base_enc) {
 496   return (scale & 3) << 6 | (index_enc & 7) << 3 | (base_enc & 7);
 497 }
 498 
 499 inline void Assembler::emit_modrm(int mod, int dst_enc, int src_enc) {
 500   assert((mod & 3) != 0b11, "forbidden");
 501   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 502   emit_int8(modrm);
 503 }
 504 
 505 inline void Assembler::emit_modrm_disp8(int mod, int dst_enc, int src_enc,
 506                                         int disp) {
 507   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 508   emit_int16(modrm, disp & 0xFF);
 509 }
 510 
 511 inline void Assembler::emit_modrm_sib(int mod, int dst_enc, int src_enc,
 512                                       Address::ScaleFactor scale, int index_enc, int base_enc) {
 513   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 514   int sib = sib_encoding(scale, index_enc, base_enc);
 515   emit_int16(modrm, sib);
 516 }
 517 
 518 inline void Assembler::emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
 519                                             Address::ScaleFactor scale, int index_enc, int base_enc,
 520                                             int disp) {
 521   int modrm = modrm_encoding(mod, dst_enc, src_enc);
 522   int sib = sib_encoding(scale, index_enc, base_enc);
 523   emit_int24(modrm, sib, disp & 0xFF);
 524 }
 525 
 526 void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc,
 527                                     Address::ScaleFactor scale, int disp,
 528                                     RelocationHolder const& rspec,
 529                                     int rip_relative_correction) {
 530   bool no_relocation = (rspec.type() == relocInfo::none);
 531 
 532   if (is_valid_encoding(base_enc)) {
 533     if (is_valid_encoding(index_enc)) {
 534       assert(scale != Address::no_scale, "inconsistent address");
 535       // [base + index*scale + disp]
 536       if (disp == 0 && no_relocation &&
 537           base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
 538         // [base + index*scale]
 539         // [00 reg 100][ss index base]
 540         emit_modrm_sib(0b00, reg_enc, 0b100,
 541                        scale, index_enc, base_enc);
 542       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 543         // [base + index*scale + imm8]
 544         // [01 reg 100][ss index base] imm8
 545         emit_modrm_sib_disp8(0b01, reg_enc, 0b100,
 546                              scale, index_enc, base_enc,
 547                              disp);
 548       } else {
 549         // [base + index*scale + disp32]
 550         // [10 reg 100][ss index base] disp32
 551         emit_modrm_sib(0b10, reg_enc, 0b100,
 552                        scale, index_enc, base_enc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     } else if (base_enc == rsp->encoding() LP64_ONLY(|| base_enc == r12->encoding())) {
 556       // [rsp + disp]
 557       if (disp == 0 && no_relocation) {
 558         // [rsp]
 559         // [00 reg 100][00 100 100]
 560         emit_modrm_sib(0b00, reg_enc, 0b100,
 561                        Address::times_1, 0b100, 0b100);
 562       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 563         // [rsp + imm8]
 564         // [01 reg 100][00 100 100] disp8
 565         emit_modrm_sib_disp8(0b01, reg_enc, 0b100,
 566                              Address::times_1, 0b100, 0b100,
 567                              disp);
 568       } else {
 569         // [rsp + imm32]
 570         // [10 reg 100][00 100 100] disp32
 571         emit_modrm_sib(0b10, reg_enc, 0b100,
 572                        Address::times_1, 0b100, 0b100);
 573         emit_data(disp, rspec, disp32_operand);
 574       }
 575     } else {
 576       // [base + disp]
 577       assert(base_enc != rsp->encoding() LP64_ONLY(&& base_enc != r12->encoding()), "illegal addressing mode");
 578       if (disp == 0 && no_relocation &&
 579           base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
 580         // [base]
 581         // [00 reg base]
 582         emit_modrm(0, reg_enc, base_enc);
 583       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
 584         // [base + disp8]
 585         // [01 reg base] disp8
 586         emit_modrm_disp8(0b01, reg_enc, base_enc,
 587                          disp);
 588       } else {
 589         // [base + disp32]
 590         // [10 reg base] disp32
 591         emit_modrm(0b10, reg_enc, base_enc);
 592         emit_data(disp, rspec, disp32_operand);
 593       }
 594     }
 595   } else {
 596     if (is_valid_encoding(index_enc)) {
 597       assert(scale != Address::no_scale, "inconsistent address");
 598       // base == noreg
 599       // [index*scale + disp]
 600       // [00 reg 100][ss index 101] disp32
 601       emit_modrm_sib(0b00, reg_enc, 0b100,
 602                      scale, index_enc, 0b101 /* no base */);
 603       emit_data(disp, rspec, disp32_operand);
 604     } else if (!no_relocation) {
 605       // base == noreg, index == noreg
 606       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 607       // [00 reg 101] disp32
 608 
 609       emit_modrm(0b00, reg_enc, 0b101 /* no base */);
 610       // Note that the RIP-rel. correction applies to the generated
 611       // disp field, but _not_ to the target address in the rspec.
 612 
 613       // disp was created by converting the target address minus the pc
 614       // at the start of the instruction. That needs more correction here.
 615       // intptr_t disp = target - next_ip;
 616       assert(inst_mark() != NULL, "must be inside InstructionMark");
 617       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 618       int64_t adjusted = disp;
 619       // Do rip-rel adjustment for 64bit
 620       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 621       assert(is_simm32(adjusted),
 622              "must be 32bit offset (RIP relative address)");
 623       emit_data((int32_t) adjusted, rspec, disp32_operand);
 624 
 625     } else {
 626       // base == noreg, index == noreg, no_relocation == true
 627       // 32bit never did this, did everything as the rip-rel/disp code above
 628       // [disp] ABSOLUTE
 629       // [00 reg 100][00 100 101] disp32
 630       emit_modrm_sib(0b00, reg_enc, 0b100 /* no base */,
 631                      Address::times_1, 0b100, 0b101);
 632       emit_data(disp, rspec, disp32_operand);
 633     }
 634   }
 635 }
 636 
 637 void Assembler::emit_operand(Register reg, Register base, Register index,
 638                              Address::ScaleFactor scale, int disp,
 639                              RelocationHolder const& rspec,
 640                              int rip_relative_correction) {
 641   assert(!index->is_valid() || index != rsp, "illegal addressing mode");
 642   emit_operand_helper(raw_encode(reg), raw_encode(base), raw_encode(index),
 643                       scale, disp, rspec, rip_relative_correction);
 644 
 645 }
 646 void Assembler::emit_operand(XMMRegister xmmreg, Register base, Register index,
 647                              Address::ScaleFactor scale, int disp,
 648                              RelocationHolder const& rspec) {
 649   assert(!index->is_valid() || index != rsp, "illegal addressing mode");
 650   assert(xmmreg->encoding() < 16 || UseAVX > 2, "not supported");
 651   emit_operand_helper(raw_encode(xmmreg), raw_encode(base), raw_encode(index),
 652                       scale, disp, rspec);
 653 }
 654 
 655 void Assembler::emit_operand(XMMRegister xmmreg, Register base, XMMRegister xmmindex,
 656                              Address::ScaleFactor scale, int disp,
 657                              RelocationHolder const& rspec) {
 658   assert(xmmreg->encoding() < 16 || UseAVX > 2, "not supported");
 659   assert(xmmindex->encoding() < 16 || UseAVX > 2, "not supported");
 660   emit_operand_helper(raw_encode(xmmreg), raw_encode(base), raw_encode(xmmindex),
 661                       scale, disp, rspec, /* rip_relative_correction */ 0);
 662 }
 663 
 664 // Secret local extension to Assembler::WhichOperand:
 665 #define end_pc_operand (_WhichOperand_limit)
 666 
 667 address Assembler::locate_operand(address inst, WhichOperand which) {
 668   // Decode the given instruction, and return the address of
 669   // an embedded 32-bit operand word.
 670 
 671   // If "which" is disp32_operand, selects the displacement portion
 672   // of an effective address specifier.
 673   // If "which" is imm64_operand, selects the trailing immediate constant.
 674   // If "which" is call32_operand, selects the displacement of a call or jump.
 675   // Caller is responsible for ensuring that there is such an operand,
 676   // and that it is 32/64 bits wide.
 677 
 678   // If "which" is end_pc_operand, find the end of the instruction.
 679 
 680   address ip = inst;
 681   bool is_64bit = false;
 682 
 683   debug_only(bool has_disp32 = false);
 684   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 685 
 686   again_after_prefix:
 687   switch (0xFF & *ip++) {
 688 
 689   // These convenience macros generate groups of "case" labels for the switch.
 690 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 691 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 692              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 693 #define REP16(x) REP8((x)+0): \
 694               case REP8((x)+8)
 695 
 696   case CS_segment:
 697   case SS_segment:
 698   case DS_segment:
 699   case ES_segment:
 700   case FS_segment:
 701   case GS_segment:
 702     // Seems dubious
 703     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 704     assert(ip == inst+1, "only one prefix allowed");
 705     goto again_after_prefix;
 706 
 707   case 0x67:
 708   case REX:
 709   case REX_B:
 710   case REX_X:
 711   case REX_XB:
 712   case REX_R:
 713   case REX_RB:
 714   case REX_RX:
 715   case REX_RXB:
 716     NOT_LP64(assert(false, "64bit prefixes"));
 717     goto again_after_prefix;
 718 
 719   case REX_W:
 720   case REX_WB:
 721   case REX_WX:
 722   case REX_WXB:
 723   case REX_WR:
 724   case REX_WRB:
 725   case REX_WRX:
 726   case REX_WRXB:
 727     NOT_LP64(assert(false, "64bit prefixes"));
 728     is_64bit = true;
 729     goto again_after_prefix;
 730 
 731   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 732   case 0x88: // movb a, r
 733   case 0x89: // movl a, r
 734   case 0x8A: // movb r, a
 735   case 0x8B: // movl r, a
 736   case 0x8F: // popl a
 737     debug_only(has_disp32 = true);
 738     break;
 739 
 740   case 0x68: // pushq #32
 741     if (which == end_pc_operand) {
 742       return ip + 4;
 743     }
 744     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 745     return ip;                  // not produced by emit_operand
 746 
 747   case 0x66: // movw ... (size prefix)
 748     again_after_size_prefix2:
 749     switch (0xFF & *ip++) {
 750     case REX:
 751     case REX_B:
 752     case REX_X:
 753     case REX_XB:
 754     case REX_R:
 755     case REX_RB:
 756     case REX_RX:
 757     case REX_RXB:
 758     case REX_W:
 759     case REX_WB:
 760     case REX_WX:
 761     case REX_WXB:
 762     case REX_WR:
 763     case REX_WRB:
 764     case REX_WRX:
 765     case REX_WRXB:
 766       NOT_LP64(assert(false, "64bit prefix found"));
 767       goto again_after_size_prefix2;
 768     case 0x8B: // movw r, a
 769     case 0x89: // movw a, r
 770       debug_only(has_disp32 = true);
 771       break;
 772     case 0xC7: // movw a, #16
 773       debug_only(has_disp32 = true);
 774       tail_size = 2;  // the imm16
 775       break;
 776     case 0x0F: // several SSE/SSE2 variants
 777       ip--;    // reparse the 0x0F
 778       goto again_after_prefix;
 779     default:
 780       ShouldNotReachHere();
 781     }
 782     break;
 783 
 784   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 785     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 786     // these asserts are somewhat nonsensical
 787 #ifndef _LP64
 788     assert(which == imm_operand || which == disp32_operand,
 789            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 790 #else
 791     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 792            which == narrow_oop_operand && !is_64bit,
 793            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 794 #endif // _LP64
 795     return ip;
 796 
 797   case 0x69: // imul r, a, #32
 798   case 0xC7: // movl a, #32(oop?)
 799     tail_size = 4;
 800     debug_only(has_disp32 = true); // has both kinds of operands!
 801     break;
 802 
 803   case 0x0F: // movx..., etc.
 804     switch (0xFF & *ip++) {
 805     case 0x3A: // pcmpestri
 806       tail_size = 1;
 807     case 0x38: // ptest, pmovzxbw
 808       ip++; // skip opcode
 809       debug_only(has_disp32 = true); // has both kinds of operands!
 810       break;
 811 
 812     case 0x70: // pshufd r, r/a, #8
 813       debug_only(has_disp32 = true); // has both kinds of operands!
 814     case 0x73: // psrldq r, #8
 815       tail_size = 1;
 816       break;
 817 
 818     case 0x12: // movlps
 819     case 0x28: // movaps
 820     case 0x2E: // ucomiss
 821     case 0x2F: // comiss
 822     case 0x54: // andps
 823     case 0x55: // andnps
 824     case 0x56: // orps
 825     case 0x57: // xorps
 826     case 0x58: // addpd
 827     case 0x59: // mulpd
 828     case 0x6E: // movd
 829     case 0x7E: // movd
 830     case 0x6F: // movdq
 831     case 0x7F: // movdq
 832     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 833     case 0xFE: // paddd
 834       debug_only(has_disp32 = true);
 835       break;
 836 
 837     case 0xAD: // shrd r, a, %cl
 838     case 0xAF: // imul r, a
 839     case 0xBE: // movsbl r, a (movsxb)
 840     case 0xBF: // movswl r, a (movsxw)
 841     case 0xB6: // movzbl r, a (movzxb)
 842     case 0xB7: // movzwl r, a (movzxw)
 843     case REP16(0x40): // cmovl cc, r, a
 844     case 0xB0: // cmpxchgb
 845     case 0xB1: // cmpxchg
 846     case 0xC1: // xaddl
 847     case 0xC7: // cmpxchg8
 848     case REP16(0x90): // setcc a
 849       debug_only(has_disp32 = true);
 850       // fall out of the switch to decode the address
 851       break;
 852 
 853     case 0xC4: // pinsrw r, a, #8
 854       debug_only(has_disp32 = true);
 855     case 0xC5: // pextrw r, r, #8
 856       tail_size = 1;  // the imm8
 857       break;
 858 
 859     case 0xAC: // shrd r, a, #8
 860       debug_only(has_disp32 = true);
 861       tail_size = 1;  // the imm8
 862       break;
 863 
 864     case REP16(0x80): // jcc rdisp32
 865       if (which == end_pc_operand)  return ip + 4;
 866       assert(which == call32_operand, "jcc has no disp32 or imm");
 867       return ip;
 868     default:
 869       ShouldNotReachHere();
 870     }
 871     break;
 872 
 873   case 0x81: // addl a, #32; addl r, #32
 874     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 875     // on 32bit in the case of cmpl, the imm might be an oop
 876     tail_size = 4;
 877     debug_only(has_disp32 = true); // has both kinds of operands!
 878     break;
 879 
 880   case 0x83: // addl a, #8; addl r, #8
 881     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 882     debug_only(has_disp32 = true); // has both kinds of operands!
 883     tail_size = 1;
 884     break;
 885 
 886   case 0x9B:
 887     switch (0xFF & *ip++) {
 888     case 0xD9: // fnstcw a
 889       debug_only(has_disp32 = true);
 890       break;
 891     default:
 892       ShouldNotReachHere();
 893     }
 894     break;
 895 
 896   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 897   case REP4(0x10): // adc...
 898   case REP4(0x20): // and...
 899   case REP4(0x30): // xor...
 900   case REP4(0x08): // or...
 901   case REP4(0x18): // sbb...
 902   case REP4(0x28): // sub...
 903   case 0xF7: // mull a
 904   case 0x8D: // lea r, a
 905   case 0x87: // xchg r, a
 906   case REP4(0x38): // cmp...
 907   case 0x85: // test r, a
 908     debug_only(has_disp32 = true); // has both kinds of operands!
 909     break;
 910 
 911   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 912   case 0xC6: // movb a, #8
 913   case 0x80: // cmpb a, #8
 914   case 0x6B: // imul r, a, #8
 915     debug_only(has_disp32 = true); // has both kinds of operands!
 916     tail_size = 1; // the imm8
 917     break;
 918 
 919   case 0xC4: // VEX_3bytes
 920   case 0xC5: // VEX_2bytes
 921     assert((UseAVX > 0), "shouldn't have VEX prefix");
 922     assert(ip == inst+1, "no prefixes allowed");
 923     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 924     // but they have prefix 0x0F and processed when 0x0F processed above.
 925     //
 926     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 927     // instructions (these instructions are not supported in 64-bit mode).
 928     // To distinguish them bits [7:6] are set in the VEX second byte since
 929     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 930     // those VEX bits REX and vvvv bits are inverted.
 931     //
 932     // Fortunately C2 doesn't generate these instructions so we don't need
 933     // to check for them in product version.
 934 
 935     // Check second byte
 936     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 937 
 938     int vex_opcode;
 939     // First byte
 940     if ((0xFF & *inst) == VEX_3bytes) {
 941       vex_opcode = VEX_OPCODE_MASK & *ip;
 942       ip++; // third byte
 943       is_64bit = ((VEX_W & *ip) == VEX_W);
 944     } else {
 945       vex_opcode = VEX_OPCODE_0F;
 946     }
 947     ip++; // opcode
 948     // To find the end of instruction (which == end_pc_operand).
 949     switch (vex_opcode) {
 950       case VEX_OPCODE_0F:
 951         switch (0xFF & *ip) {
 952         case 0x70: // pshufd r, r/a, #8
 953         case 0x71: // ps[rl|ra|ll]w r, #8
 954         case 0x72: // ps[rl|ra|ll]d r, #8
 955         case 0x73: // ps[rl|ra|ll]q r, #8
 956         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 957         case 0xC4: // pinsrw r, r, r/a, #8
 958         case 0xC5: // pextrw r/a, r, #8
 959         case 0xC6: // shufp[s|d] r, r, r/a, #8
 960           tail_size = 1;  // the imm8
 961           break;
 962         }
 963         break;
 964       case VEX_OPCODE_0F_3A:
 965         tail_size = 1;
 966         break;
 967     }
 968     ip++; // skip opcode
 969     debug_only(has_disp32 = true); // has both kinds of operands!
 970     break;
 971 
 972   case 0x62: // EVEX_4bytes
 973     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 974     assert(ip == inst+1, "no prefixes allowed");
 975     // no EVEX collisions, all instructions that have 0x62 opcodes
 976     // have EVEX versions and are subopcodes of 0x66
 977     ip++; // skip P0 and exmaine W in P1
 978     is_64bit = ((VEX_W & *ip) == VEX_W);
 979     ip++; // move to P2
 980     ip++; // skip P2, move to opcode
 981     // To find the end of instruction (which == end_pc_operand).
 982     switch (0xFF & *ip) {
 983     case 0x22: // pinsrd r, r/a, #8
 984     case 0x61: // pcmpestri r, r/a, #8
 985     case 0x70: // pshufd r, r/a, #8
 986     case 0x73: // psrldq r, #8
 987     case 0x1f: // evpcmpd/evpcmpq
 988     case 0x3f: // evpcmpb/evpcmpw
 989       tail_size = 1;  // the imm8
 990       break;
 991     default:
 992       break;
 993     }
 994     ip++; // skip opcode
 995     debug_only(has_disp32 = true); // has both kinds of operands!
 996     break;
 997 
 998   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 999   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
1000   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
1001   case 0xDD: // fld_d a; fst_d a; fstp_d a
1002   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
1003   case 0xDF: // fild_d a; fistp_d a
1004   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
1005   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
1006   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
1007     debug_only(has_disp32 = true);
1008     break;
1009 
1010   case 0xE8: // call rdisp32
1011   case 0xE9: // jmp  rdisp32
1012     if (which == end_pc_operand)  return ip + 4;
1013     assert(which == call32_operand, "call has no disp32 or imm");
1014     return ip;
1015 
1016   case 0xF0:                    // Lock
1017     goto again_after_prefix;
1018 
1019   case 0xF3:                    // For SSE
1020   case 0xF2:                    // For SSE2
1021     switch (0xFF & *ip++) {
1022     case REX:
1023     case REX_B:
1024     case REX_X:
1025     case REX_XB:
1026     case REX_R:
1027     case REX_RB:
1028     case REX_RX:
1029     case REX_RXB:
1030     case REX_W:
1031     case REX_WB:
1032     case REX_WX:
1033     case REX_WXB:
1034     case REX_WR:
1035     case REX_WRB:
1036     case REX_WRX:
1037     case REX_WRXB:
1038       NOT_LP64(assert(false, "found 64bit prefix"));
1039       ip++;
1040     default:
1041       ip++;
1042     }
1043     debug_only(has_disp32 = true); // has both kinds of operands!
1044     break;
1045 
1046   default:
1047     ShouldNotReachHere();
1048 
1049 #undef REP8
1050 #undef REP16
1051   }
1052 
1053   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
1054 #ifdef _LP64
1055   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1056 #else
1057   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1058   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1059 #endif // LP64
1060   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1061 
1062   // parse the output of emit_operand
1063   int op2 = 0xFF & *ip++;
1064   int base = op2 & 0x07;
1065   int op3 = -1;
1066   const int b100 = 4;
1067   const int b101 = 5;
1068   if (base == b100 && (op2 >> 6) != 3) {
1069     op3 = 0xFF & *ip++;
1070     base = op3 & 0x07;   // refetch the base
1071   }
1072   // now ip points at the disp (if any)
1073 
1074   switch (op2 >> 6) {
1075   case 0:
1076     // [00 reg  100][ss index base]
1077     // [00 reg  100][00   100  esp]
1078     // [00 reg base]
1079     // [00 reg  100][ss index  101][disp32]
1080     // [00 reg  101]               [disp32]
1081 
1082     if (base == b101) {
1083       if (which == disp32_operand)
1084         return ip;              // caller wants the disp32
1085       ip += 4;                  // skip the disp32
1086     }
1087     break;
1088 
1089   case 1:
1090     // [01 reg  100][ss index base][disp8]
1091     // [01 reg  100][00   100  esp][disp8]
1092     // [01 reg base]               [disp8]
1093     ip += 1;                    // skip the disp8
1094     break;
1095 
1096   case 2:
1097     // [10 reg  100][ss index base][disp32]
1098     // [10 reg  100][00   100  esp][disp32]
1099     // [10 reg base]               [disp32]
1100     if (which == disp32_operand)
1101       return ip;                // caller wants the disp32
1102     ip += 4;                    // skip the disp32
1103     break;
1104 
1105   case 3:
1106     // [11 reg base]  (not a memory addressing mode)
1107     break;
1108   }
1109 
1110   if (which == end_pc_operand) {
1111     return ip + tail_size;
1112   }
1113 
1114 #ifdef _LP64
1115   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1116 #else
1117   assert(which == imm_operand, "instruction has only an imm field");
1118 #endif // LP64
1119   return ip;
1120 }
1121 
1122 address Assembler::locate_next_instruction(address inst) {
1123   // Secretly share code with locate_operand:
1124   return locate_operand(inst, end_pc_operand);
1125 }
1126 
1127 
1128 #ifdef ASSERT
1129 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1130   address inst = inst_mark();
1131   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1132   address opnd;
1133 
1134   Relocation* r = rspec.reloc();
1135   if (r->type() == relocInfo::none) {
1136     return;
1137   } else if (r->is_call() || format == call32_operand) {
1138     // assert(format == imm32_operand, "cannot specify a nonzero format");
1139     opnd = locate_operand(inst, call32_operand);
1140   } else if (r->is_data()) {
1141     assert(format == imm_operand || format == disp32_operand
1142            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1143     opnd = locate_operand(inst, (WhichOperand)format);
1144   } else {
1145     assert(format == imm_operand, "cannot specify a format");
1146     return;
1147   }
1148   assert(opnd == pc(), "must put operand where relocs can find it");
1149 }
1150 #endif // ASSERT
1151 
1152 void Assembler::emit_operand(Register reg, Address adr,
1153                              int rip_relative_correction) {
1154   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1155                adr._rspec,
1156                rip_relative_correction);
1157 }
1158 
1159 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1160     if (adr.isxmmindex()) {
1161        emit_operand(reg, adr._base, adr._xmmindex, adr._scale, adr._disp, adr._rspec);
1162     } else {
1163        emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1164        adr._rspec);
1165     }
1166 }
1167 
1168 // Now the Assembler instructions (identical for 32/64 bits)
1169 
1170 void Assembler::adcl(Address dst, int32_t imm32) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_arith_operand(0x81, rdx, dst, imm32);
1174 }
1175 
1176 void Assembler::adcl(Address dst, Register src) {
1177   InstructionMark im(this);
1178   prefix(dst, src);
1179   emit_int8(0x11);
1180   emit_operand(src, dst);
1181 }
1182 
1183 void Assembler::adcl(Register dst, int32_t imm32) {
1184   prefix(dst);
1185   emit_arith(0x81, 0xD0, dst, imm32);
1186 }
1187 
1188 void Assembler::adcl(Register dst, Address src) {
1189   InstructionMark im(this);
1190   prefix(src, dst);
1191   emit_int8(0x13);
1192   emit_operand(dst, src);
1193 }
1194 
1195 void Assembler::adcl(Register dst, Register src) {
1196   (void) prefix_and_encode(dst->encoding(), src->encoding());
1197   emit_arith(0x13, 0xC0, dst, src);
1198 }
1199 
1200 void Assembler::addl(Address dst, int32_t imm32) {
1201   InstructionMark im(this);
1202   prefix(dst);
1203   emit_arith_operand(0x81, rax, dst, imm32);
1204 }
1205 
1206 void Assembler::addb(Address dst, int imm8) {
1207   InstructionMark im(this);
1208   prefix(dst);
1209   emit_int8((unsigned char)0x80);
1210   emit_operand(rax, dst, 1);
1211   emit_int8(imm8);
1212 }
1213 
1214 void Assembler::addw(Register dst, Register src) {
1215   (void)prefix_and_encode(dst->encoding(), src->encoding());
1216   emit_arith(0x03, 0xC0, dst, src);
1217 }
1218 
1219 void Assembler::addw(Address dst, int imm16) {
1220   InstructionMark im(this);
1221   emit_int8(0x66);
1222   prefix(dst);
1223   emit_int8((unsigned char)0x81);
1224   emit_operand(rax, dst, 2);
1225   emit_int16(imm16);
1226 }
1227 
1228 void Assembler::addl(Address dst, Register src) {
1229   InstructionMark im(this);
1230   prefix(dst, src);
1231   emit_int8(0x01);
1232   emit_operand(src, dst);
1233 }
1234 
1235 void Assembler::addl(Register dst, int32_t imm32) {
1236   prefix(dst);
1237   emit_arith(0x81, 0xC0, dst, imm32);
1238 }
1239 
1240 void Assembler::addl(Register dst, Address src) {
1241   InstructionMark im(this);
1242   prefix(src, dst);
1243   emit_int8(0x03);
1244   emit_operand(dst, src);
1245 }
1246 
1247 void Assembler::addl(Register dst, Register src) {
1248   (void) prefix_and_encode(dst->encoding(), src->encoding());
1249   emit_arith(0x03, 0xC0, dst, src);
1250 }
1251 
1252 void Assembler::addr_nop_4() {
1253   assert(UseAddressNop, "no CPU support");
1254   // 4 bytes: NOP DWORD PTR [EAX+0]
1255   emit_int32(0x0F,
1256              0x1F,
1257              0x40, // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1258              0);   // 8-bits offset (1 byte)
1259 }
1260 
1261 void Assembler::addr_nop_5() {
1262   assert(UseAddressNop, "no CPU support");
1263   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1264   emit_int32(0x0F,
1265              0x1F,
1266              0x44,  // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1267              0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1268   emit_int8(0);     // 8-bits offset (1 byte)
1269 }
1270 
1271 void Assembler::addr_nop_7() {
1272   assert(UseAddressNop, "no CPU support");
1273   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1274   emit_int24(0x0F,
1275              0x1F,
1276              (unsigned char)0x80);
1277                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1278   emit_int32(0);   // 32-bits offset (4 bytes)
1279 }
1280 
1281 void Assembler::addr_nop_8() {
1282   assert(UseAddressNop, "no CPU support");
1283   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1284   emit_int32(0x0F,
1285              0x1F,
1286              (unsigned char)0x84,
1287                     // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1288              0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1289   emit_int32(0);    // 32-bits offset (4 bytes)
1290 }
1291 
1292 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1293   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1294   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1295   attributes.set_rex_vex_w_reverted();
1296   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1297   emit_int16(0x58, (0xC0 | encode));
1298 }
1299 
1300 void Assembler::addsd(XMMRegister dst, Address src) {
1301   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1302   InstructionMark im(this);
1303   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1304   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1305   attributes.set_rex_vex_w_reverted();
1306   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1307   emit_int8(0x58);
1308   emit_operand(dst, src);
1309 }
1310 
1311 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1312   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1313   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1315   emit_int16(0x58, (0xC0 | encode));
1316 }
1317 
1318 void Assembler::addss(XMMRegister dst, Address src) {
1319   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1320   InstructionMark im(this);
1321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1322   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1323   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1324   emit_int8(0x58);
1325   emit_operand(dst, src);
1326 }
1327 
1328 void Assembler::aesdec(XMMRegister dst, Address src) {
1329   assert(VM_Version::supports_aes(), "");
1330   InstructionMark im(this);
1331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1332   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1333   emit_int8((unsigned char)0xDE);
1334   emit_operand(dst, src);
1335 }
1336 
1337 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1338   assert(VM_Version::supports_aes(), "");
1339   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1340   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1341   emit_int16((unsigned char)0xDE, (0xC0 | encode));
1342 }
1343 
1344 void Assembler::vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1345   assert(VM_Version::supports_avx512_vaes(), "");
1346   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1347   attributes.set_is_evex_instruction();
1348   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1349   emit_int16((unsigned char)0xDE, (0xC0 | encode));
1350 }
1351 
1352 
1353 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1354   assert(VM_Version::supports_aes(), "");
1355   InstructionMark im(this);
1356   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1357   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1358   emit_int8((unsigned char)0xDF);
1359   emit_operand(dst, src);
1360 }
1361 
1362 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1363   assert(VM_Version::supports_aes(), "");
1364   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1365   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1366   emit_int16((unsigned char)0xDF, (0xC0 | encode));
1367 }
1368 
1369 void Assembler::vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1370   assert(VM_Version::supports_avx512_vaes(), "");
1371   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1372   attributes.set_is_evex_instruction();
1373   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1374   emit_int16((unsigned char)0xDF, (0xC0 | encode));
1375 }
1376 
1377 void Assembler::aesenc(XMMRegister dst, Address src) {
1378   assert(VM_Version::supports_aes(), "");
1379   InstructionMark im(this);
1380   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1381   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1382   emit_int8((unsigned char)0xDC);
1383   emit_operand(dst, src);
1384 }
1385 
1386 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1387   assert(VM_Version::supports_aes(), "");
1388   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1389   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1390   emit_int16((unsigned char)0xDC, 0xC0 | encode);
1391 }
1392 
1393 void Assembler::vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1394   assert(VM_Version::supports_avx512_vaes(), "requires vaes support/enabling");
1395   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1396   attributes.set_is_evex_instruction();
1397   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1398   emit_int16((unsigned char)0xDC, (0xC0 | encode));
1399 }
1400 
1401 void Assembler::aesenclast(XMMRegister dst, Address src) {
1402   assert(VM_Version::supports_aes(), "");
1403   InstructionMark im(this);
1404   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1405   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1406   emit_int8((unsigned char)0xDD);
1407   emit_operand(dst, src);
1408 }
1409 
1410 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1411   assert(VM_Version::supports_aes(), "");
1412   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1413   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1414   emit_int16((unsigned char)0xDD, (0xC0 | encode));
1415 }
1416 
1417 void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1418   assert(VM_Version::supports_avx512_vaes(), "requires vaes support/enabling");
1419   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1420   attributes.set_is_evex_instruction();
1421   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1422   emit_int16((unsigned char)0xDD, (0xC0 | encode));
1423 }
1424 
1425 void Assembler::andw(Register dst, Register src) {
1426   (void)prefix_and_encode(dst->encoding(), src->encoding());
1427   emit_arith(0x23, 0xC0, dst, src);
1428 }
1429 
1430 void Assembler::andl(Address dst, int32_t imm32) {
1431   InstructionMark im(this);
1432   prefix(dst);
1433   emit_int8((unsigned char)0x81);
1434   emit_operand(rsp, dst, 4);
1435   emit_int32(imm32);
1436 }
1437 
1438 void Assembler::andl(Register dst, int32_t imm32) {
1439   prefix(dst);
1440   emit_arith(0x81, 0xE0, dst, imm32);
1441 }
1442 
1443 void Assembler::andl(Register dst, Address src) {
1444   InstructionMark im(this);
1445   prefix(src, dst);
1446   emit_int8(0x23);
1447   emit_operand(dst, src);
1448 }
1449 
1450 void Assembler::andl(Register dst, Register src) {
1451   (void) prefix_and_encode(dst->encoding(), src->encoding());
1452   emit_arith(0x23, 0xC0, dst, src);
1453 }
1454 
1455 void Assembler::andnl(Register dst, Register src1, Register src2) {
1456   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1457   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1458   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1459   emit_int16((unsigned char)0xF2, (0xC0 | encode));
1460 }
1461 
1462 void Assembler::andnl(Register dst, Register src1, Address src2) {
1463   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1464   InstructionMark im(this);
1465   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1466   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1467   emit_int8((unsigned char)0xF2);
1468   emit_operand(dst, src2);
1469 }
1470 
1471 void Assembler::bsfl(Register dst, Register src) {
1472   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1473   emit_int24(0x0F,
1474              (unsigned char)0xBC,
1475              0xC0 | encode);
1476 }
1477 
1478 void Assembler::bsrl(Register dst, Register src) {
1479   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1480   emit_int24(0x0F,
1481              (unsigned char)0xBD,
1482              0xC0 | encode);
1483 }
1484 
1485 void Assembler::bswapl(Register reg) { // bswap
1486   int encode = prefix_and_encode(reg->encoding());
1487   emit_int16(0x0F, (0xC8 | encode));
1488 }
1489 
1490 void Assembler::blsil(Register dst, Register src) {
1491   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1492   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1493   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1494   emit_int16((unsigned char)0xF3, (0xC0 | encode));
1495 }
1496 
1497 void Assembler::blsil(Register dst, Address src) {
1498   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1499   InstructionMark im(this);
1500   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1501   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1502   emit_int8((unsigned char)0xF3);
1503   emit_operand(rbx, src);
1504 }
1505 
1506 void Assembler::blsmskl(Register dst, Register src) {
1507   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1508   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1509   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1510   emit_int16((unsigned char)0xF3,
1511              0xC0 | encode);
1512 }
1513 
1514 void Assembler::blsmskl(Register dst, Address src) {
1515   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1516   InstructionMark im(this);
1517   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1518   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1519   emit_int8((unsigned char)0xF3);
1520   emit_operand(rdx, src);
1521 }
1522 
1523 void Assembler::blsrl(Register dst, Register src) {
1524   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1525   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1526   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1527   emit_int16((unsigned char)0xF3, (0xC0 | encode));
1528 }
1529 
1530 void Assembler::blsrl(Register dst, Address src) {
1531   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1532   InstructionMark im(this);
1533   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1534   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1535   emit_int8((unsigned char)0xF3);
1536   emit_operand(rcx, src);
1537 }
1538 
1539 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1540   // suspect disp32 is always good
1541   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1542 
1543   if (L.is_bound()) {
1544     const int long_size = 5;
1545     int offs = (int)( target(L) - pc() );
1546     assert(offs <= 0, "assembler error");
1547     InstructionMark im(this);
1548     // 1110 1000 #32-bit disp
1549     emit_int8((unsigned char)0xE8);
1550     emit_data(offs - long_size, rtype, operand);
1551   } else {
1552     InstructionMark im(this);
1553     // 1110 1000 #32-bit disp
1554     L.add_patch_at(code(), locator());
1555 
1556     emit_int8((unsigned char)0xE8);
1557     emit_data(int(0), rtype, operand);
1558   }
1559 }
1560 
1561 void Assembler::call(Register dst) {
1562   int encode = prefix_and_encode(dst->encoding());
1563   emit_int16((unsigned char)0xFF, (0xD0 | encode));
1564 }
1565 
1566 
1567 void Assembler::call(Address adr) {
1568   InstructionMark im(this);
1569   prefix(adr);
1570   emit_int8((unsigned char)0xFF);
1571   emit_operand(rdx, adr);
1572 }
1573 
1574 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1575   InstructionMark im(this);
1576   emit_int8((unsigned char)0xE8);
1577   intptr_t disp = entry - (pc() + sizeof(int32_t));
1578   // Entry is NULL in case of a scratch emit.
1579   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1580   // Technically, should use call32_operand, but this format is
1581   // implied by the fact that we're emitting a call instruction.
1582 
1583   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1584   emit_data((int) disp, rspec, operand);
1585 }
1586 
1587 void Assembler::cdql() {
1588   emit_int8((unsigned char)0x99);
1589 }
1590 
1591 void Assembler::cld() {
1592   emit_int8((unsigned char)0xFC);
1593 }
1594 
1595 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1596   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1597   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1598   emit_int24(0x0F,
1599              0x40 | cc,
1600              0xC0 | encode);
1601 }
1602 
1603 
1604 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1605   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1606   prefix(src, dst);
1607   emit_int16(0x0F, (0x40 | cc));
1608   emit_operand(dst, src);
1609 }
1610 
1611 void Assembler::cmpb(Address dst, int imm8) {
1612   InstructionMark im(this);
1613   prefix(dst);
1614   emit_int8((unsigned char)0x80);
1615   emit_operand(rdi, dst, 1);
1616   emit_int8(imm8);
1617 }
1618 
1619 void Assembler::cmpl(Address dst, int32_t imm32) {
1620   InstructionMark im(this);
1621   prefix(dst);
1622   emit_int8((unsigned char)0x81);
1623   emit_operand(rdi, dst, 4);
1624   emit_int32(imm32);
1625 }
1626 
1627 void Assembler::cmpl(Register dst, int32_t imm32) {
1628   prefix(dst);
1629   emit_arith(0x81, 0xF8, dst, imm32);
1630 }
1631 
1632 void Assembler::cmpl(Register dst, Register src) {
1633   (void) prefix_and_encode(dst->encoding(), src->encoding());
1634   emit_arith(0x3B, 0xC0, dst, src);
1635 }
1636 
1637 void Assembler::cmpl(Register dst, Address  src) {
1638   InstructionMark im(this);
1639   prefix(src, dst);
1640   emit_int8(0x3B);
1641   emit_operand(dst, src);
1642 }
1643 
1644 void Assembler::cmpw(Address dst, int imm16) {
1645   InstructionMark im(this);
1646   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1647   emit_int16(0x66, (unsigned char)0x81);
1648   emit_operand(rdi, dst, 2);
1649   emit_int16(imm16);
1650 }
1651 
1652 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1653 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1654 // The ZF is set if the compared values were equal, and cleared otherwise.
1655 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1656   InstructionMark im(this);
1657   prefix(adr, reg);
1658   emit_int16(0x0F, (unsigned char)0xB1);
1659   emit_operand(reg, adr);
1660 }
1661 
1662 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1663 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1664 // The ZF is set if the compared values were equal, and cleared otherwise.
1665 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1666   InstructionMark im(this);
1667   prefix(adr, reg, true);
1668   emit_int16(0x0F, (unsigned char)0xB0);
1669   emit_operand(reg, adr);
1670 }
1671 
1672 void Assembler::comisd(XMMRegister dst, Address src) {
1673   // NOTE: dbx seems to decode this as comiss even though the
1674   // 0x66 is there. Strangly ucomisd comes out correct
1675   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1676   InstructionMark im(this);
1677   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1678   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1679   attributes.set_rex_vex_w_reverted();
1680   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1681   emit_int8(0x2F);
1682   emit_operand(dst, src);
1683 }
1684 
1685 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1686   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1687   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1688   attributes.set_rex_vex_w_reverted();
1689   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1690   emit_int16(0x2F, (0xC0 | encode));
1691 }
1692 
1693 void Assembler::comiss(XMMRegister dst, Address src) {
1694   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1695   InstructionMark im(this);
1696   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1697   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1698   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1699   emit_int8(0x2F);
1700   emit_operand(dst, src);
1701 }
1702 
1703 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1704   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1706   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1707   emit_int16(0x2F, (0xC0 | encode));
1708 }
1709 
1710 void Assembler::cpuid() {
1711   emit_int16(0x0F, (unsigned char)0xA2);
1712 }
1713 
1714 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1715 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1716 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1717 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1718 //
1719 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1720 //
1721 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1722 //
1723 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1724 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1725   assert(VM_Version::supports_sse4_2(), "");
1726   int8_t w = 0x01;
1727   Prefix p = Prefix_EMPTY;
1728 
1729   emit_int8((unsigned char)0xF2);
1730   switch (sizeInBytes) {
1731   case 1:
1732     w = 0;
1733     break;
1734   case 2:
1735   case 4:
1736     break;
1737   LP64_ONLY(case 8:)
1738     // This instruction is not valid in 32 bits
1739     // Note:
1740     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1741     //
1742     // Page B - 72   Vol. 2C says
1743     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1744     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1745     //                                                                            F0!!!
1746     // while 3 - 208 Vol. 2A
1747     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1748     //
1749     // the 0 on a last bit is reserved for a different flavor of this instruction :
1750     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1751     p = REX_W;
1752     break;
1753   default:
1754     assert(0, "Unsupported value for a sizeInBytes argument");
1755     break;
1756   }
1757   LP64_ONLY(prefix(crc, v, p);)
1758   emit_int32(0x0F,
1759              0x38,
1760              0xF0 | w,
1761              0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1762 }
1763 
1764 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1765   assert(VM_Version::supports_sse4_2(), "");
1766   InstructionMark im(this);
1767   int8_t w = 0x01;
1768   Prefix p = Prefix_EMPTY;
1769 
1770   emit_int8((int8_t)0xF2);
1771   switch (sizeInBytes) {
1772   case 1:
1773     w = 0;
1774     break;
1775   case 2:
1776   case 4:
1777     break;
1778   LP64_ONLY(case 8:)
1779     // This instruction is not valid in 32 bits
1780     p = REX_W;
1781     break;
1782   default:
1783     assert(0, "Unsupported value for a sizeInBytes argument");
1784     break;
1785   }
1786   LP64_ONLY(prefix(crc, adr, p);)
1787   emit_int24(0x0F, 0x38, (0xF0 | w));
1788   emit_operand(crc, adr);
1789 }
1790 
1791 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1792   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1793   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1794   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1795   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1796 }
1797 
1798 void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1799   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1800   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1801   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1802   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1803 }
1804 
1805 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1806   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1807   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1808   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1809   emit_int16(0x5B, (0xC0 | encode));
1810 }
1811 
1812 void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1813   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1814   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1815   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1816   emit_int16(0x5B, (0xC0 | encode));
1817 }
1818 
1819 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1820   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1821   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1822   attributes.set_rex_vex_w_reverted();
1823   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1824   emit_int16(0x5A, (0xC0 | encode));
1825 }
1826 
1827 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1828   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1829   InstructionMark im(this);
1830   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1831   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1832   attributes.set_rex_vex_w_reverted();
1833   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1834   emit_int8(0x5A);
1835   emit_operand(dst, src);
1836 }
1837 
1838 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1840   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1841   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1842   emit_int16(0x2A, (0xC0 | encode));
1843 }
1844 
1845 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1846   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1847   InstructionMark im(this);
1848   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1849   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1850   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1851   emit_int8(0x2A);
1852   emit_operand(dst, src);
1853 }
1854 
1855 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1856   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1857   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1858   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1859   emit_int16(0x2A, (0xC0 | encode));
1860 }
1861 
1862 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1863   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1864   InstructionMark im(this);
1865   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1866   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1867   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1868   emit_int8(0x2A);
1869   emit_operand(dst, src);
1870 }
1871 
1872 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1873   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1874   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1875   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1876   emit_int16(0x2A, (0xC0 | encode));
1877 }
1878 
1879 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1882   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1883   emit_int16(0x5A, (0xC0 | encode));
1884 }
1885 
1886 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1887   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1888   InstructionMark im(this);
1889   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1890   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1891   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1892   emit_int8(0x5A);
1893   emit_operand(dst, src);
1894 }
1895 
1896 
1897 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1898   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1899   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1900   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1901   emit_int16(0x2C, (0xC0 | encode));
1902 }
1903 
1904 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1905   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1906   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1907   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1908   emit_int16(0x2C, (0xC0 | encode));
1909 }
1910 
1911 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1912   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1913   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1914   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1915   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1916   emit_int16((unsigned char)0xE6, (0xC0 | encode));
1917 }
1918 
1919 void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
1920   assert(VM_Version::supports_ssse3(), "");
1921   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1922   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1923   emit_int16(0x1C, (0xC0 | encode));
1924 }
1925 
1926 void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
1927   assert(VM_Version::supports_ssse3(), "");
1928   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1929   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1930   emit_int16(0x1D, (0xC0 | encode));
1931 }
1932 
1933 void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
1934   assert(VM_Version::supports_ssse3(), "");
1935   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1936   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1937   emit_int16(0x1E, (0xC0 | encode));
1938 }
1939 
1940 void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
1941   assert(vector_len == AVX_128bit ? VM_Version::supports_avx()      :
1942          vector_len == AVX_256bit ? VM_Version::supports_avx2()     :
1943          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
1944   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1945   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1946   emit_int16(0x1C, (0xC0 | encode));
1947 }
1948 
1949 void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
1950   assert(vector_len == AVX_128bit ? VM_Version::supports_avx()      :
1951          vector_len == AVX_256bit ? VM_Version::supports_avx2()     :
1952          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "");
1953   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
1954   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1955   emit_int16(0x1D, (0xC0 | encode));
1956 }
1957 
1958 void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
1959   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
1960   vector_len == AVX_256bit? VM_Version::supports_avx2() :
1961   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, "");
1962   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1963   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1964   emit_int16(0x1E, (0xC0 | encode));
1965 }
1966 
1967 void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
1968   assert(UseAVX > 2, "");
1969   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1970   attributes.set_is_evex_instruction();
1971   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1972   emit_int16(0x1F, (0xC0 | encode));
1973 }
1974 
1975 void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1976   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1977   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1978   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1979   emit_int16(0x5A, (0xC0 | encode));
1980 }
1981 
1982 void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1983   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
1984   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1985   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1986   attributes.set_rex_vex_w_reverted();
1987   emit_int16(0x5A, (0xC0 | encode));
1988 }
1989 
1990 void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
1991   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
1992   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
1993   attributes.set_is_evex_instruction();
1994   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1995   emit_int16(0x5B, (0xC0 | encode));
1996 }
1997 
1998 void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
1999   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
2000   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2001   attributes.set_is_evex_instruction();
2002   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2003   emit_int16((unsigned char)0xE6, (0xC0 | encode));
2004 }
2005 
2006 void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
2007   assert(UseAVX > 2  && VM_Version::supports_avx512bw(), "");
2008   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2009   attributes.set_is_evex_instruction();
2010   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2011   emit_int16(0x30, (0xC0 | encode));
2012 }
2013 
2014 void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) {
2015   assert(UseAVX > 2, "");
2016   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2017   attributes.set_is_evex_instruction();
2018   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2019   emit_int16(0x33, (0xC0 | encode));
2020 }
2021 
2022 void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) {
2023   assert(UseAVX > 2, "");
2024   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2025   attributes.set_is_evex_instruction();
2026   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2027   emit_int16(0x31, (0xC0 | encode));
2028 }
2029 
2030 void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) {
2031   assert(UseAVX > 2, "");
2032   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2033   attributes.set_is_evex_instruction();
2034   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2035   emit_int16(0x35, (0xC0 | encode));
2036 }
2037 
2038 void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) {
2039   assert(UseAVX > 2, "");
2040   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2041   attributes.set_is_evex_instruction();
2042   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2043   emit_int16(0x32, (0xC0 | encode));
2044 }
2045 
2046 void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
2047   assert(UseAVX > 2, "");
2048   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2049   attributes.set_is_evex_instruction();
2050   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
2051   emit_int16(0x34, (0xC0 | encode));
2052 }
2053 
2054 void Assembler::decl(Address dst) {
2055   // Don't use it directly. Use MacroAssembler::decrement() instead.
2056   InstructionMark im(this);
2057   prefix(dst);
2058   emit_int8((unsigned char)0xFF);
2059   emit_operand(rcx, dst);
2060 }
2061 
2062 void Assembler::divsd(XMMRegister dst, Address src) {
2063   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2064   InstructionMark im(this);
2065   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2066   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2067   attributes.set_rex_vex_w_reverted();
2068   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2069   emit_int8(0x5E);
2070   emit_operand(dst, src);
2071 }
2072 
2073 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
2074   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2075   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2076   attributes.set_rex_vex_w_reverted();
2077   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2078   emit_int16(0x5E, (0xC0 | encode));
2079 }
2080 
2081 void Assembler::divss(XMMRegister dst, Address src) {
2082   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2083   InstructionMark im(this);
2084   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2085   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2086   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2087   emit_int8(0x5E);
2088   emit_operand(dst, src);
2089 }
2090 
2091 void Assembler::divss(XMMRegister dst, XMMRegister src) {
2092   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2093   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2094   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2095   emit_int16(0x5E, (0xC0 | encode));
2096 }
2097 
2098 void Assembler::hlt() {
2099   emit_int8((unsigned char)0xF4);
2100 }
2101 
2102 void Assembler::idivl(Register src) {
2103   int encode = prefix_and_encode(src->encoding());
2104   emit_int16((unsigned char)0xF7, (0xF8 | encode));
2105 }
2106 
2107 void Assembler::divl(Register src) { // Unsigned
2108   int encode = prefix_and_encode(src->encoding());
2109   emit_int16((unsigned char)0xF7, (0xF0 | encode));
2110 }
2111 
2112 void Assembler::imull(Register src) {
2113   int encode = prefix_and_encode(src->encoding());
2114   emit_int16((unsigned char)0xF7, (0xE8 | encode));
2115 }
2116 
2117 void Assembler::imull(Register dst, Register src) {
2118   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2119   emit_int24(0x0F,
2120              (unsigned char)0xAF,
2121              (0xC0 | encode));
2122 }
2123 
2124 
2125 void Assembler::imull(Register dst, Register src, int value) {
2126   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2127   if (is8bit(value)) {
2128     emit_int24(0x6B, (0xC0 | encode), value & 0xFF);
2129   } else {
2130     emit_int16(0x69, (0xC0 | encode));
2131     emit_int32(value);
2132   }
2133 }
2134 
2135 void Assembler::imull(Register dst, Address src) {
2136   InstructionMark im(this);
2137   prefix(src, dst);
2138   emit_int16(0x0F, (unsigned char)0xAF);
2139   emit_operand(dst, src);
2140 }
2141 
2142 
2143 void Assembler::incl(Address dst) {
2144   // Don't use it directly. Use MacroAssembler::increment() instead.
2145   InstructionMark im(this);
2146   prefix(dst);
2147   emit_int8((unsigned char)0xFF);
2148   emit_operand(rax, dst);
2149 }
2150 
2151 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
2152   InstructionMark im(this);
2153   assert((0 <= cc) && (cc < 16), "illegal cc");
2154   if (L.is_bound()) {
2155     address dst = target(L);
2156     assert(dst != NULL, "jcc most probably wrong");
2157 
2158     const int short_size = 2;
2159     const int long_size = 6;
2160     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
2161     if (maybe_short && is8bit(offs - short_size)) {
2162       // 0111 tttn #8-bit disp
2163       emit_int16(0x70 | cc, (offs - short_size) & 0xFF);
2164     } else {
2165       // 0000 1111 1000 tttn #32-bit disp
2166       assert(is_simm32(offs - long_size),
2167              "must be 32bit offset (call4)");
2168       emit_int16(0x0F, (0x80 | cc));
2169       emit_int32(offs - long_size);
2170     }
2171   } else {
2172     // Note: could eliminate cond. jumps to this jump if condition
2173     //       is the same however, seems to be rather unlikely case.
2174     // Note: use jccb() if label to be bound is very close to get
2175     //       an 8-bit displacement
2176     L.add_patch_at(code(), locator());
2177     emit_int16(0x0F, (0x80 | cc));
2178     emit_int32(0);
2179   }
2180 }
2181 
2182 void Assembler::jccb_0(Condition cc, Label& L, const char* file, int line) {
2183   if (L.is_bound()) {
2184     const int short_size = 2;
2185     address entry = target(L);
2186 #ifdef ASSERT
2187     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2188     intptr_t delta = short_branch_delta();
2189     if (delta != 0) {
2190       dist += (dist < 0 ? (-delta) :delta);
2191     }
2192     assert(is8bit(dist), "Dispacement too large for a short jmp at %s:%d", file, line);
2193 #endif
2194     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2195     // 0111 tttn #8-bit disp
2196     emit_int16(0x70 | cc, (offs - short_size) & 0xFF);
2197   } else {
2198     InstructionMark im(this);
2199     L.add_patch_at(code(), locator(), file, line);
2200     emit_int16(0x70 | cc, 0);
2201   }
2202 }
2203 
2204 void Assembler::jmp(Address adr) {
2205   InstructionMark im(this);
2206   prefix(adr);
2207   emit_int8((unsigned char)0xFF);
2208   emit_operand(rsp, adr);
2209 }
2210 
2211 void Assembler::jmp(Label& L, bool maybe_short) {
2212   if (L.is_bound()) {
2213     address entry = target(L);
2214     assert(entry != NULL, "jmp most probably wrong");
2215     InstructionMark im(this);
2216     const int short_size = 2;
2217     const int long_size = 5;
2218     intptr_t offs = entry - pc();
2219     if (maybe_short && is8bit(offs - short_size)) {
2220       emit_int16((unsigned char)0xEB, ((offs - short_size) & 0xFF));
2221     } else {
2222       emit_int8((unsigned char)0xE9);
2223       emit_int32(offs - long_size);
2224     }
2225   } else {
2226     // By default, forward jumps are always 32-bit displacements, since
2227     // we can't yet know where the label will be bound.  If you're sure that
2228     // the forward jump will not run beyond 256 bytes, use jmpb to
2229     // force an 8-bit displacement.
2230     InstructionMark im(this);
2231     L.add_patch_at(code(), locator());
2232     emit_int8((unsigned char)0xE9);
2233     emit_int32(0);
2234   }
2235 }
2236 
2237 void Assembler::jmp(Register entry) {
2238   int encode = prefix_and_encode(entry->encoding());
2239   emit_int16((unsigned char)0xFF, (0xE0 | encode));
2240 }
2241 
2242 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2243   InstructionMark im(this);
2244   emit_int8((unsigned char)0xE9);
2245   assert(dest != NULL, "must have a target");
2246   intptr_t disp = dest - (pc() + sizeof(int32_t));
2247   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2248   emit_data(disp, rspec.reloc(), call32_operand);
2249 }
2250 
2251 void Assembler::jmpb_0(Label& L, const char* file, int line) {
2252   if (L.is_bound()) {
2253     const int short_size = 2;
2254     address entry = target(L);
2255     assert(entry != NULL, "jmp most probably wrong");
2256 #ifdef ASSERT
2257     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2258     intptr_t delta = short_branch_delta();
2259     if (delta != 0) {
2260       dist += (dist < 0 ? (-delta) :delta);
2261     }
2262     assert(is8bit(dist), "Dispacement too large for a short jmp at %s:%d", file, line);
2263 #endif
2264     intptr_t offs = entry - pc();
2265     emit_int16((unsigned char)0xEB, (offs - short_size) & 0xFF);
2266   } else {
2267     InstructionMark im(this);
2268     L.add_patch_at(code(), locator(), file, line);
2269     emit_int16((unsigned char)0xEB, 0);
2270   }
2271 }
2272 
2273 void Assembler::ldmxcsr( Address src) {
2274   if (UseAVX > 0 ) {
2275     InstructionMark im(this);
2276     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2277     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2278     emit_int8((unsigned char)0xAE);
2279     emit_operand(as_Register(2), src);
2280   } else {
2281     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2282     InstructionMark im(this);
2283     prefix(src);
2284     emit_int16(0x0F, (unsigned char)0xAE);
2285     emit_operand(as_Register(2), src);
2286   }
2287 }
2288 
2289 void Assembler::leal(Register dst, Address src) {
2290   InstructionMark im(this);
2291 #ifdef _LP64
2292   emit_int8(0x67); // addr32
2293   prefix(src, dst);
2294 #endif // LP64
2295   emit_int8((unsigned char)0x8D);
2296   emit_operand(dst, src);
2297 }
2298 
2299 void Assembler::lfence() {
2300   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xE8);
2301 }
2302 
2303 void Assembler::lock() {
2304   emit_int8((unsigned char)0xF0);
2305 }
2306 
2307 void Assembler::lzcntl(Register dst, Register src) {
2308   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2309   emit_int8((unsigned char)0xF3);
2310   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2311   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
2312 }
2313 
2314 // Emit mfence instruction
2315 void Assembler::mfence() {
2316   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2317   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF0);
2318 }
2319 
2320 // Emit sfence instruction
2321 void Assembler::sfence() {
2322   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2323   emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF8);
2324 }
2325 
2326 void Assembler::mov(Register dst, Register src) {
2327   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2328 }
2329 
2330 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2332   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2333   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2334   attributes.set_rex_vex_w_reverted();
2335   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2336   emit_int16(0x28, (0xC0 | encode));
2337 }
2338 
2339 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2340   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2341   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2342   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2343   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2344   emit_int16(0x28, (0xC0 | encode));
2345 }
2346 
2347 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2348   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2350   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2351   emit_int16(0x16, (0xC0 | encode));
2352 }
2353 
2354 void Assembler::movb(Register dst, Address src) {
2355   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2356   InstructionMark im(this);
2357   prefix(src, dst, true);
2358   emit_int8((unsigned char)0x8A);
2359   emit_operand(dst, src);
2360 }
2361 
2362 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2363   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2364   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2365   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2366   attributes.set_rex_vex_w_reverted();
2367   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2368   emit_int16(0x12, 0xC0 | encode);
2369 }
2370 
2371 void Assembler::kmovbl(KRegister dst, Register src) {
2372   assert(VM_Version::supports_avx512dq(), "");
2373   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2374   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2375   emit_int16((unsigned char)0x92, (0xC0 | encode));
2376 }
2377 
2378 void Assembler::kmovbl(Register dst, KRegister src) {
2379   assert(VM_Version::supports_avx512dq(), "");
2380   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2381   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2382   emit_int16((unsigned char)0x93, (0xC0 | encode));
2383 }
2384 
2385 void Assembler::kmovwl(KRegister dst, Register src) {
2386   assert(VM_Version::supports_evex(), "");
2387   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2388   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2389   emit_int16((unsigned char)0x92, (0xC0 | encode));
2390 }
2391 
2392 void Assembler::kmovwl(Register dst, KRegister src) {
2393   assert(VM_Version::supports_evex(), "");
2394   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2395   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2396   emit_int16((unsigned char)0x93, (0xC0 | encode));
2397 }
2398 
2399 void Assembler::kmovwl(KRegister dst, Address src) {
2400   assert(VM_Version::supports_evex(), "");
2401   InstructionMark im(this);
2402   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2403   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2404   emit_int8((unsigned char)0x90);
2405   emit_operand((Register)dst, src);
2406 }
2407 
2408 void Assembler::kmovdl(KRegister dst, Register src) {
2409   assert(VM_Version::supports_avx512bw(), "");
2410   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2411   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2412   emit_int16((unsigned char)0x92, (0xC0 | encode));
2413 }
2414 
2415 void Assembler::kmovdl(Register dst, KRegister src) {
2416   assert(VM_Version::supports_avx512bw(), "");
2417   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2418   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2419   emit_int16((unsigned char)0x93, (0xC0 | encode));
2420 }
2421 
2422 void Assembler::kmovql(KRegister dst, KRegister src) {
2423   assert(VM_Version::supports_avx512bw(), "");
2424   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2425   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2426   emit_int16((unsigned char)0x90, (0xC0 | encode));
2427 }
2428 
2429 void Assembler::kmovql(KRegister dst, Address src) {
2430   assert(VM_Version::supports_avx512bw(), "");
2431   InstructionMark im(this);
2432   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2433   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2434   emit_int8((unsigned char)0x90);
2435   emit_operand((Register)dst, src);
2436 }
2437 
2438 void Assembler::kmovql(Address dst, KRegister src) {
2439   assert(VM_Version::supports_avx512bw(), "");
2440   InstructionMark im(this);
2441   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2442   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2443   emit_int8((unsigned char)0x90);
2444   emit_operand((Register)src, dst);
2445 }
2446 
2447 void Assembler::kmovql(KRegister dst, Register src) {
2448   assert(VM_Version::supports_avx512bw(), "");
2449   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2450   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2451   emit_int16((unsigned char)0x92, (0xC0 | encode));
2452 }
2453 
2454 void Assembler::kmovql(Register dst, KRegister src) {
2455   assert(VM_Version::supports_avx512bw(), "");
2456   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2457   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2458   emit_int16((unsigned char)0x93, (0xC0 | encode));
2459 }
2460 
2461 void Assembler::knotwl(KRegister dst, KRegister src) {
2462   assert(VM_Version::supports_evex(), "");
2463   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2464   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2465   emit_int16(0x44, (0xC0 | encode));
2466 }
2467 
2468 // This instruction produces ZF or CF flags
2469 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2470   assert(VM_Version::supports_avx512dq(), "");
2471   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2472   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2473   emit_int16((unsigned char)0x98, (0xC0 | encode));
2474 }
2475 
2476 // This instruction produces ZF or CF flags
2477 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2478   assert(VM_Version::supports_evex(), "");
2479   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2480   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2481   emit_int16((unsigned char)0x98, (0xC0 | encode));
2482 }
2483 
2484 // This instruction produces ZF or CF flags
2485 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2486   assert(VM_Version::supports_avx512bw(), "");
2487   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2488   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2489   emit_int16((unsigned char)0x98, (0xC0 | encode));
2490 }
2491 
2492 // This instruction produces ZF or CF flags
2493 void Assembler::kortestql(KRegister src1, KRegister src2) {
2494   assert(VM_Version::supports_avx512bw(), "");
2495   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2496   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2497   emit_int16((unsigned char)0x98, (0xC0 | encode));
2498 }
2499 
2500 // This instruction produces ZF or CF flags
2501 void Assembler::ktestql(KRegister src1, KRegister src2) {
2502   assert(VM_Version::supports_avx512bw(), "");
2503   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2504   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2505   emit_int16((unsigned char)0x99, (0xC0 | encode));
2506 }
2507 
2508 void Assembler::ktestq(KRegister src1, KRegister src2) {
2509   assert(VM_Version::supports_avx512bw(), "");
2510   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2511   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2512   emit_int16((unsigned char)0x99, (0xC0 | encode));
2513 }
2514 
2515 void Assembler::ktestd(KRegister src1, KRegister src2) {
2516   assert(VM_Version::supports_avx512bw(), "");
2517   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2518   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2519   emit_int16((unsigned char)0x99, (0xC0 | encode));
2520 }
2521 
2522 void Assembler::movb(Address dst, int imm8) {
2523   InstructionMark im(this);
2524    prefix(dst);
2525   emit_int8((unsigned char)0xC6);
2526   emit_operand(rax, dst, 1);
2527   emit_int8(imm8);
2528 }
2529 
2530 
2531 void Assembler::movb(Address dst, Register src) {
2532   assert(src->has_byte_register(), "must have byte register");
2533   InstructionMark im(this);
2534   prefix(dst, src, true);
2535   emit_int8((unsigned char)0x88);
2536   emit_operand(src, dst);
2537 }
2538 
2539 void Assembler::movdl(XMMRegister dst, Register src) {
2540   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2541   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2542   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2543   emit_int16(0x6E, (0xC0 | encode));
2544 }
2545 
2546 void Assembler::movdl(Register dst, XMMRegister src) {
2547   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2548   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2549   // swap src/dst to get correct prefix
2550   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2551   emit_int16(0x7E, (0xC0 | encode));
2552 }
2553 
2554 void Assembler::movdl(XMMRegister dst, Address src) {
2555   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2556   InstructionMark im(this);
2557   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2558   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2559   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2560   emit_int8(0x6E);
2561   emit_operand(dst, src);
2562 }
2563 
2564 void Assembler::movdl(Address dst, XMMRegister src) {
2565   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2566   InstructionMark im(this);
2567   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2568   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2569   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2570   emit_int8(0x7E);
2571   emit_operand(src, dst);
2572 }
2573 
2574 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2575   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2576   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2577   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2578   emit_int16(0x6F, (0xC0 | encode));
2579 }
2580 
2581 void Assembler::movdqa(XMMRegister dst, Address src) {
2582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2583   InstructionMark im(this);
2584   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2585   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2586   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2587   emit_int8(0x6F);
2588   emit_operand(dst, src);
2589 }
2590 
2591 void Assembler::movdqu(XMMRegister dst, Address src) {
2592   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2593   InstructionMark im(this);
2594   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2595   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2596   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2597   emit_int8(0x6F);
2598   emit_operand(dst, src);
2599 }
2600 
2601 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2603   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2604   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2605   emit_int16(0x6F, (0xC0 | encode));
2606 }
2607 
2608 void Assembler::movdqu(Address dst, XMMRegister src) {
2609   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2610   InstructionMark im(this);
2611   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2612   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2613   attributes.reset_is_clear_context();
2614   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2615   emit_int8(0x7F);
2616   emit_operand(src, dst);
2617 }
2618 
2619 // Move Unaligned 256bit Vector
2620 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2621   assert(UseAVX > 0, "");
2622   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2623   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2624   emit_int16(0x6F, (0xC0 | encode));
2625 }
2626 
2627 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2628   assert(UseAVX > 0, "");
2629   InstructionMark im(this);
2630   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2631   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2632   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2633   emit_int8(0x6F);
2634   emit_operand(dst, src);
2635 }
2636 
2637 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2638   assert(UseAVX > 0, "");
2639   InstructionMark im(this);
2640   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2641   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2642   attributes.reset_is_clear_context();
2643   // swap src<->dst for encoding
2644   assert(src != xnoreg, "sanity");
2645   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2646   emit_int8(0x7F);
2647   emit_operand(src, dst);
2648 }
2649 
2650 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2651 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
2652   assert(VM_Version::supports_evex(), "");
2653   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2654   attributes.set_is_evex_instruction();
2655   if (merge) {
2656     attributes.reset_is_clear_context();
2657   }
2658   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2659   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2660   emit_int16(0x6F, (0xC0 | encode));
2661 }
2662 
2663 void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
2664   assert(VM_Version::supports_evex(), "");
2665   InstructionMark im(this);
2666   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2667   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2668   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2669   attributes.set_is_evex_instruction();
2670   if (merge) {
2671     attributes.reset_is_clear_context();
2672   }
2673   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2674   emit_int8(0x6F);
2675   emit_operand(dst, src);
2676 }
2677 
2678 void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
2679   assert(VM_Version::supports_evex(), "");
2680   assert(src != xnoreg, "sanity");
2681   InstructionMark im(this);
2682   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2683   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2684   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2685   attributes.set_is_evex_instruction();
2686   if (merge) {
2687     attributes.reset_is_clear_context();
2688   }
2689   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2690   emit_int8(0x7F);
2691   emit_operand(src, dst);
2692 }
2693 
2694 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2695   assert(VM_Version::supports_avx512vlbw(), "");
2696   InstructionMark im(this);
2697   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2698   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2699   attributes.set_embedded_opmask_register_specifier(mask);
2700   attributes.set_is_evex_instruction();
2701   if (merge) {
2702     attributes.reset_is_clear_context();
2703   }
2704   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2705   emit_int8(0x6F);
2706   emit_operand(dst, src);
2707 }
2708 
2709 void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
2710   assert(VM_Version::supports_evex(), "");
2711   InstructionMark im(this);
2712   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2713   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2714   attributes.set_is_evex_instruction();
2715   if (merge) {
2716     attributes.reset_is_clear_context();
2717   }
2718   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2719   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2720   emit_int8(0x6F);
2721   emit_operand(dst, src);
2722 }
2723 
2724 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2725   assert(VM_Version::supports_avx512vlbw(), "");
2726   InstructionMark im(this);
2727   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2728   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2729   attributes.set_embedded_opmask_register_specifier(mask);
2730   attributes.set_is_evex_instruction();
2731   if (merge) {
2732     attributes.reset_is_clear_context();
2733   }
2734   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2735   emit_int8(0x6F);
2736   emit_operand(dst, src);
2737 }
2738 
2739 void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
2740   assert(VM_Version::supports_evex(), "");
2741   assert(src != xnoreg, "sanity");
2742   InstructionMark im(this);
2743   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2744   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2745   attributes.set_is_evex_instruction();
2746   if (merge) {
2747     attributes.reset_is_clear_context();
2748   }
2749   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2750   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2751   emit_int8(0x7F);
2752   emit_operand(src, dst);
2753 }
2754 
2755 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2756   assert(VM_Version::supports_avx512vlbw(), "");
2757   assert(src != xnoreg, "sanity");
2758   InstructionMark im(this);
2759   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2760   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2761   attributes.set_embedded_opmask_register_specifier(mask);
2762   attributes.set_is_evex_instruction();
2763   if (merge) {
2764     attributes.reset_is_clear_context();
2765   }
2766   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2767   emit_int8(0x7F);
2768   emit_operand(src, dst);
2769 }
2770 
2771 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2772   // Unmasked instruction
2773   evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
2774 }
2775 
2776 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2777   assert(VM_Version::supports_evex(), "");
2778   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2779   attributes.set_embedded_opmask_register_specifier(mask);
2780   attributes.set_is_evex_instruction();
2781   if (merge) {
2782     attributes.reset_is_clear_context();
2783   }
2784   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2785   emit_int16(0x6F, (0xC0 | encode));
2786 }
2787 
2788 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2789   // Unmasked instruction
2790   evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
2791 }
2792 
2793 void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2794   assert(VM_Version::supports_evex(), "");
2795   InstructionMark im(this);
2796   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2797   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2798   attributes.set_embedded_opmask_register_specifier(mask);
2799   attributes.set_is_evex_instruction();
2800   if (merge) {
2801     attributes.reset_is_clear_context();
2802   }
2803   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2804   emit_int8(0x6F);
2805   emit_operand(dst, src);
2806 }
2807 
2808 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2809   // Unmasked isntruction
2810   evmovdqul(dst, k0, src, /*merge*/ true, vector_len);
2811 }
2812 
2813 void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2814   assert(VM_Version::supports_evex(), "");
2815   assert(src != xnoreg, "sanity");
2816   InstructionMark im(this);
2817   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2818   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2819   attributes.set_embedded_opmask_register_specifier(mask);
2820   attributes.set_is_evex_instruction();
2821   if (merge) {
2822     attributes.reset_is_clear_context();
2823   }
2824   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2825   emit_int8(0x7F);
2826   emit_operand(src, dst);
2827 }
2828 
2829 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2830   // Unmasked instruction
2831   if (dst->encoding() == src->encoding()) return;
2832   evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
2833 }
2834 
2835 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2836   assert(VM_Version::supports_evex(), "");
2837   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2838   attributes.set_embedded_opmask_register_specifier(mask);
2839   attributes.set_is_evex_instruction();
2840   if (merge) {
2841     attributes.reset_is_clear_context();
2842   }
2843   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2844   emit_int16(0x6F, (0xC0 | encode));
2845 }
2846 
2847 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2848   // Unmasked instruction
2849   evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
2850 }
2851 
2852 void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
2853   assert(VM_Version::supports_evex(), "");
2854   InstructionMark im(this);
2855   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2856   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2857   attributes.set_embedded_opmask_register_specifier(mask);
2858   attributes.set_is_evex_instruction();
2859   if (merge) {
2860     attributes.reset_is_clear_context();
2861   }
2862   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2863   emit_int8(0x6F);
2864   emit_operand(dst, src);
2865 }
2866 
2867 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2868   // Unmasked instruction
2869   evmovdquq(dst, k0, src, /*merge*/ true, vector_len);
2870 }
2871 
2872 void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
2873   assert(VM_Version::supports_evex(), "");
2874   assert(src != xnoreg, "sanity");
2875   InstructionMark im(this);
2876   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2877   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2878   attributes.set_embedded_opmask_register_specifier(mask);
2879   if (merge) {
2880     attributes.reset_is_clear_context();
2881   }
2882   attributes.set_is_evex_instruction();
2883   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2884   emit_int8(0x7F);
2885   emit_operand(src, dst);
2886 }
2887 
2888 // Uses zero extension on 64bit
2889 
2890 void Assembler::movl(Register dst, int32_t imm32) {
2891   int encode = prefix_and_encode(dst->encoding());
2892   emit_int8(0xB8 | encode);
2893   emit_int32(imm32);
2894 }
2895 
2896 void Assembler::movl(Register dst, Register src) {
2897   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2898   emit_int16((unsigned char)0x8B, (0xC0 | encode));
2899 }
2900 
2901 void Assembler::movl(Register dst, Address src) {
2902   InstructionMark im(this);
2903   prefix(src, dst);
2904   emit_int8((unsigned char)0x8B);
2905   emit_operand(dst, src);
2906 }
2907 
2908 void Assembler::movl(Address dst, int32_t imm32) {
2909   InstructionMark im(this);
2910   prefix(dst);
2911   emit_int8((unsigned char)0xC7);
2912   emit_operand(rax, dst, 4);
2913   emit_int32(imm32);
2914 }
2915 
2916 void Assembler::movl(Address dst, Register src) {
2917   InstructionMark im(this);
2918   prefix(dst, src);
2919   emit_int8((unsigned char)0x89);
2920   emit_operand(src, dst);
2921 }
2922 
2923 // New cpus require to use movsd and movss to avoid partial register stall
2924 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2925 // The selection is done in MacroAssembler::movdbl() and movflt().
2926 void Assembler::movlpd(XMMRegister dst, Address src) {
2927   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2928   InstructionMark im(this);
2929   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
2930   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2931   attributes.set_rex_vex_w_reverted();
2932   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2933   emit_int8(0x12);
2934   emit_operand(dst, src);
2935 }
2936 
2937 void Assembler::movq(XMMRegister dst, Address src) {
2938   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2939   InstructionMark im(this);
2940   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2941   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2942   attributes.set_rex_vex_w_reverted();
2943   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2944   emit_int8(0x7E);
2945   emit_operand(dst, src);
2946 }
2947 
2948 void Assembler::movq(Address dst, XMMRegister src) {
2949   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2950   InstructionMark im(this);
2951   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2952   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2953   attributes.set_rex_vex_w_reverted();
2954   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2955   emit_int8((unsigned char)0xD6);
2956   emit_operand(src, dst);
2957 }
2958 
2959 void Assembler::movq(XMMRegister dst, XMMRegister src) {
2960   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2961   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2962   attributes.set_rex_vex_w_reverted();
2963   int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2964   emit_int16((unsigned char)0xD6, (0xC0 | encode));
2965 }
2966 
2967 void Assembler::movq(Register dst, XMMRegister src) {
2968   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2969   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2970   // swap src/dst to get correct prefix
2971   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2972   emit_int16(0x7E, (0xC0 | encode));
2973 }
2974 
2975 void Assembler::movq(XMMRegister dst, Register src) {
2976   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2977   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2978   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2979   emit_int16(0x6E, (0xC0 | encode));
2980 }
2981 
2982 void Assembler::movsbl(Register dst, Address src) { // movsxb
2983   InstructionMark im(this);
2984   prefix(src, dst);
2985   emit_int16(0x0F, (unsigned char)0xBE);
2986   emit_operand(dst, src);
2987 }
2988 
2989 void Assembler::movsbl(Register dst, Register src) { // movsxb
2990   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2991   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2992   emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
2993 }
2994 
2995 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2996   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2997   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2998   attributes.set_rex_vex_w_reverted();
2999   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3000   emit_int16(0x10, (0xC0 | encode));
3001 }
3002 
3003 void Assembler::movsd(XMMRegister dst, Address src) {
3004   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3005   InstructionMark im(this);
3006   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3007   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3008   attributes.set_rex_vex_w_reverted();
3009   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3010   emit_int8(0x10);
3011   emit_operand(dst, src);
3012 }
3013 
3014 void Assembler::movsd(Address dst, XMMRegister src) {
3015   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3016   InstructionMark im(this);
3017   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3018   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3019   attributes.reset_is_clear_context();
3020   attributes.set_rex_vex_w_reverted();
3021   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3022   emit_int8(0x11);
3023   emit_operand(src, dst);
3024 }
3025 
3026 void Assembler::movss(XMMRegister dst, XMMRegister src) {
3027   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3028   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3029   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3030   emit_int16(0x10, (0xC0 | encode));
3031 }
3032 
3033 void Assembler::movss(XMMRegister dst, Address src) {
3034   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3035   InstructionMark im(this);
3036   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3037   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3038   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3039   emit_int8(0x10);
3040   emit_operand(dst, src);
3041 }
3042 
3043 void Assembler::movss(Address dst, XMMRegister src) {
3044   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3045   InstructionMark im(this);
3046   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3047   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3048   attributes.reset_is_clear_context();
3049   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3050   emit_int8(0x11);
3051   emit_operand(src, dst);
3052 }
3053 
3054 void Assembler::movswl(Register dst, Address src) { // movsxw
3055   InstructionMark im(this);
3056   prefix(src, dst);
3057   emit_int16(0x0F, (unsigned char)0xBF);
3058   emit_operand(dst, src);
3059 }
3060 
3061 void Assembler::movswl(Register dst, Register src) { // movsxw
3062   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3063   emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
3064 }
3065 
3066 void Assembler::movw(Address dst, int imm16) {
3067   InstructionMark im(this);
3068 
3069   emit_int8(0x66); // switch to 16-bit mode
3070   prefix(dst);
3071   emit_int8((unsigned char)0xC7);
3072   emit_operand(rax, dst, 2);
3073   emit_int16(imm16);
3074 }
3075 
3076 void Assembler::movw(Register dst, Address src) {
3077   InstructionMark im(this);
3078   emit_int8(0x66);
3079   prefix(src, dst);
3080   emit_int8((unsigned char)0x8B);
3081   emit_operand(dst, src);
3082 }
3083 
3084 void Assembler::movw(Address dst, Register src) {
3085   InstructionMark im(this);
3086   emit_int8(0x66);
3087   prefix(dst, src);
3088   emit_int8((unsigned char)0x89);
3089   emit_operand(src, dst);
3090 }
3091 
3092 void Assembler::movzbl(Register dst, Address src) { // movzxb
3093   InstructionMark im(this);
3094   prefix(src, dst);
3095   emit_int16(0x0F, (unsigned char)0xB6);
3096   emit_operand(dst, src);
3097 }
3098 
3099 void Assembler::movzbl(Register dst, Register src) { // movzxb
3100   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
3101   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
3102   emit_int24(0x0F, (unsigned char)0xB6, 0xC0 | encode);
3103 }
3104 
3105 void Assembler::movzwl(Register dst, Address src) { // movzxw
3106   InstructionMark im(this);
3107   prefix(src, dst);
3108   emit_int16(0x0F, (unsigned char)0xB7);
3109   emit_operand(dst, src);
3110 }
3111 
3112 void Assembler::movzwl(Register dst, Register src) { // movzxw
3113   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3114   emit_int24(0x0F, (unsigned char)0xB7, 0xC0 | encode);
3115 }
3116 
3117 void Assembler::mull(Address src) {
3118   InstructionMark im(this);
3119   prefix(src);
3120   emit_int8((unsigned char)0xF7);
3121   emit_operand(rsp, src);
3122 }
3123 
3124 void Assembler::mull(Register src) {
3125   int encode = prefix_and_encode(src->encoding());
3126   emit_int16((unsigned char)0xF7, (0xE0 | encode));
3127 }
3128 
3129 void Assembler::mulsd(XMMRegister dst, Address src) {
3130   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3131   InstructionMark im(this);
3132   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3133   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3134   attributes.set_rex_vex_w_reverted();
3135   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3136   emit_int8(0x59);
3137   emit_operand(dst, src);
3138 }
3139 
3140 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
3141   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3142   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3143   attributes.set_rex_vex_w_reverted();
3144   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
3145   emit_int16(0x59, (0xC0 | encode));
3146 }
3147 
3148 void Assembler::mulss(XMMRegister dst, Address src) {
3149   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3150   InstructionMark im(this);
3151   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3152   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3153   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3154   emit_int8(0x59);
3155   emit_operand(dst, src);
3156 }
3157 
3158 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
3159   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3160   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3161   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
3162   emit_int16(0x59, (0xC0 | encode));
3163 }
3164 
3165 void Assembler::negl(Register dst) {
3166   int encode = prefix_and_encode(dst->encoding());
3167   emit_int16((unsigned char)0xF7, (0xD8 | encode));
3168 }
3169 
3170 void Assembler::nop(int i) {
3171 #ifdef ASSERT
3172   assert(i > 0, " ");
3173   // The fancy nops aren't currently recognized by debuggers making it a
3174   // pain to disassemble code while debugging. If asserts are on clearly
3175   // speed is not an issue so simply use the single byte traditional nop
3176   // to do alignment.
3177 
3178   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
3179   return;
3180 
3181 #endif // ASSERT
3182 
3183   if (UseAddressNop && VM_Version::is_intel()) {
3184     //
3185     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
3186     //  1: 0x90
3187     //  2: 0x66 0x90
3188     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3189     //  4: 0x0F 0x1F 0x40 0x00
3190     //  5: 0x0F 0x1F 0x44 0x00 0x00
3191     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3192     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3193     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3194     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3195     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3196     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3197 
3198     // The rest coding is Intel specific - don't use consecutive address nops
3199 
3200     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3201     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3202     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3203     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3204 
3205     while(i >= 15) {
3206       // For Intel don't generate consecutive addess nops (mix with regular nops)
3207       i -= 15;
3208       emit_int24(0x66, 0x66, 0x66);
3209       addr_nop_8();
3210       emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3211     }
3212     switch (i) {
3213       case 14:
3214         emit_int8(0x66); // size prefix
3215       case 13:
3216         emit_int8(0x66); // size prefix
3217       case 12:
3218         addr_nop_8();
3219         emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3220         break;
3221       case 11:
3222         emit_int8(0x66); // size prefix
3223       case 10:
3224         emit_int8(0x66); // size prefix
3225       case 9:
3226         emit_int8(0x66); // size prefix
3227       case 8:
3228         addr_nop_8();
3229         break;
3230       case 7:
3231         addr_nop_7();
3232         break;
3233       case 6:
3234         emit_int8(0x66); // size prefix
3235       case 5:
3236         addr_nop_5();
3237         break;
3238       case 4:
3239         addr_nop_4();
3240         break;
3241       case 3:
3242         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3243         emit_int8(0x66); // size prefix
3244       case 2:
3245         emit_int8(0x66); // size prefix
3246       case 1:
3247         emit_int8((unsigned char)0x90);
3248                          // nop
3249         break;
3250       default:
3251         assert(i == 0, " ");
3252     }
3253     return;
3254   }
3255   if (UseAddressNop && VM_Version::is_amd_family()) {
3256     //
3257     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3258     //  1: 0x90
3259     //  2: 0x66 0x90
3260     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3261     //  4: 0x0F 0x1F 0x40 0x00
3262     //  5: 0x0F 0x1F 0x44 0x00 0x00
3263     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3264     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3265     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3266     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3267     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3268     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3269 
3270     // The rest coding is AMD specific - use consecutive address nops
3271 
3272     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3273     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3274     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3275     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3276     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3277     //     Size prefixes (0x66) are added for larger sizes
3278 
3279     while(i >= 22) {
3280       i -= 11;
3281       emit_int24(0x66, 0x66, 0x66);
3282       addr_nop_8();
3283     }
3284     // Generate first nop for size between 21-12
3285     switch (i) {
3286       case 21:
3287         i -= 1;
3288         emit_int8(0x66); // size prefix
3289       case 20:
3290       case 19:
3291         i -= 1;
3292         emit_int8(0x66); // size prefix
3293       case 18:
3294       case 17:
3295         i -= 1;
3296         emit_int8(0x66); // size prefix
3297       case 16:
3298       case 15:
3299         i -= 8;
3300         addr_nop_8();
3301         break;
3302       case 14:
3303       case 13:
3304         i -= 7;
3305         addr_nop_7();
3306         break;
3307       case 12:
3308         i -= 6;
3309         emit_int8(0x66); // size prefix
3310         addr_nop_5();
3311         break;
3312       default:
3313         assert(i < 12, " ");
3314     }
3315 
3316     // Generate second nop for size between 11-1
3317     switch (i) {
3318       case 11:
3319         emit_int8(0x66); // size prefix
3320       case 10:
3321         emit_int8(0x66); // size prefix
3322       case 9:
3323         emit_int8(0x66); // size prefix
3324       case 8:
3325         addr_nop_8();
3326         break;
3327       case 7:
3328         addr_nop_7();
3329         break;
3330       case 6:
3331         emit_int8(0x66); // size prefix
3332       case 5:
3333         addr_nop_5();
3334         break;
3335       case 4:
3336         addr_nop_4();
3337         break;
3338       case 3:
3339         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3340         emit_int8(0x66); // size prefix
3341       case 2:
3342         emit_int8(0x66); // size prefix
3343       case 1:
3344         emit_int8((unsigned char)0x90);
3345                          // nop
3346         break;
3347       default:
3348         assert(i == 0, " ");
3349     }
3350     return;
3351   }
3352 
3353   if (UseAddressNop && VM_Version::is_zx()) {
3354     //
3355     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3356     //  1: 0x90
3357     //  2: 0x66 0x90
3358     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3359     //  4: 0x0F 0x1F 0x40 0x00
3360     //  5: 0x0F 0x1F 0x44 0x00 0x00
3361     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3362     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3363     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3364     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3365     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3366     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3367 
3368     // The rest coding is ZX specific - don't use consecutive address nops
3369 
3370     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3371     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3372     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3373     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3374 
3375     while (i >= 15) {
3376       // For ZX don't generate consecutive addess nops (mix with regular nops)
3377       i -= 15;
3378       emit_int24(0x66, 0x66, 0x66);
3379       addr_nop_8();
3380       emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3381     }
3382     switch (i) {
3383       case 14:
3384         emit_int8(0x66); // size prefix
3385       case 13:
3386         emit_int8(0x66); // size prefix
3387       case 12:
3388         addr_nop_8();
3389         emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3390         break;
3391       case 11:
3392         emit_int8(0x66); // size prefix
3393       case 10:
3394         emit_int8(0x66); // size prefix
3395       case 9:
3396         emit_int8(0x66); // size prefix
3397       case 8:
3398         addr_nop_8();
3399         break;
3400       case 7:
3401         addr_nop_7();
3402         break;
3403       case 6:
3404         emit_int8(0x66); // size prefix
3405       case 5:
3406         addr_nop_5();
3407         break;
3408       case 4:
3409         addr_nop_4();
3410         break;
3411       case 3:
3412         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3413         emit_int8(0x66); // size prefix
3414       case 2:
3415         emit_int8(0x66); // size prefix
3416       case 1:
3417         emit_int8((unsigned char)0x90);
3418                          // nop
3419         break;
3420       default:
3421         assert(i == 0, " ");
3422     }
3423     return;
3424   }
3425 
3426   // Using nops with size prefixes "0x66 0x90".
3427   // From AMD Optimization Guide:
3428   //  1: 0x90
3429   //  2: 0x66 0x90
3430   //  3: 0x66 0x66 0x90
3431   //  4: 0x66 0x66 0x66 0x90
3432   //  5: 0x66 0x66 0x90 0x66 0x90
3433   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3434   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3435   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3436   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3437   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3438   //
3439   while (i > 12) {
3440     i -= 4;
3441     emit_int32(0x66, 0x66, 0x66, (unsigned char)0x90);
3442   }
3443   // 1 - 12 nops
3444   if (i > 8) {
3445     if (i > 9) {
3446       i -= 1;
3447       emit_int8(0x66);
3448     }
3449     i -= 3;
3450     emit_int24(0x66, 0x66, (unsigned char)0x90);
3451   }
3452   // 1 - 8 nops
3453   if (i > 4) {
3454     if (i > 6) {
3455       i -= 1;
3456       emit_int8(0x66);
3457     }
3458     i -= 3;
3459     emit_int24(0x66, 0x66, (unsigned char)0x90);
3460   }
3461   switch (i) {
3462     case 4:
3463       emit_int8(0x66);
3464     case 3:
3465       emit_int8(0x66);
3466     case 2:
3467       emit_int8(0x66);
3468     case 1:
3469       emit_int8((unsigned char)0x90);
3470       break;
3471     default:
3472       assert(i == 0, " ");
3473   }
3474 }
3475 
3476 void Assembler::notl(Register dst) {
3477   int encode = prefix_and_encode(dst->encoding());
3478   emit_int16((unsigned char)0xF7, (0xD0 | encode));
3479 }
3480 
3481 void Assembler::orw(Register dst, Register src) {
3482   (void)prefix_and_encode(dst->encoding(), src->encoding());
3483   emit_arith(0x0B, 0xC0, dst, src);
3484 }
3485 
3486 void Assembler::orl(Address dst, int32_t imm32) {
3487   InstructionMark im(this);
3488   prefix(dst);
3489   emit_arith_operand(0x81, rcx, dst, imm32);
3490 }
3491 
3492 void Assembler::orl(Register dst, int32_t imm32) {
3493   prefix(dst);
3494   emit_arith(0x81, 0xC8, dst, imm32);
3495 }
3496 
3497 void Assembler::orl(Register dst, Address src) {
3498   InstructionMark im(this);
3499   prefix(src, dst);
3500   emit_int8(0x0B);
3501   emit_operand(dst, src);
3502 }
3503 
3504 void Assembler::orl(Register dst, Register src) {
3505   (void) prefix_and_encode(dst->encoding(), src->encoding());
3506   emit_arith(0x0B, 0xC0, dst, src);
3507 }
3508 
3509 void Assembler::orl(Address dst, Register src) {
3510   InstructionMark im(this);
3511   prefix(dst, src);
3512   emit_int8(0x09);
3513   emit_operand(src, dst);
3514 }
3515 
3516 void Assembler::orb(Address dst, int imm8) {
3517   InstructionMark im(this);
3518   prefix(dst);
3519   emit_int8((unsigned char)0x80);
3520   emit_operand(rcx, dst, 1);
3521   emit_int8(imm8);
3522 }
3523 
3524 void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
3525   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3526   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3527   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3528   emit_int16(0x63, (0xC0 | encode));
3529 }
3530 
3531 void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3532   assert(UseAVX > 0, "some form of AVX must be enabled");
3533   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3534   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3535   emit_int16(0x63, (0xC0 | encode));
3536 }
3537 
3538 void Assembler::packssdw(XMMRegister dst, XMMRegister src) {
3539   assert(VM_Version::supports_sse2(), "");
3540   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3541   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3542   emit_int16(0x6B, (0xC0 | encode));
3543 }
3544 
3545 void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3546   assert(UseAVX > 0, "some form of AVX must be enabled");
3547   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3548   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3549   emit_int16(0x6B, (0xC0 | encode));
3550 }
3551 
3552 void Assembler::packuswb(XMMRegister dst, Address src) {
3553   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3554   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3555   InstructionMark im(this);
3556   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3557   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3558   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3559   emit_int8(0x67);
3560   emit_operand(dst, src);
3561 }
3562 
3563 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3564   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3565   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3566   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3567   emit_int16(0x67, (0xC0 | encode));
3568 }
3569 
3570 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3571   assert(UseAVX > 0, "some form of AVX must be enabled");
3572   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3573   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3574   emit_int16(0x67, (0xC0 | encode));
3575 }
3576 
3577 void Assembler::packusdw(XMMRegister dst, XMMRegister src) {
3578   assert(VM_Version::supports_sse4_1(), "");
3579   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3580   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3581   emit_int16(0x2B, (0xC0 | encode));
3582 }
3583 
3584 void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3585   assert(UseAVX > 0, "some form of AVX must be enabled");
3586   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3587   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3588   emit_int16(0x2B, (0xC0 | encode));
3589 }
3590 
3591 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3592   assert(VM_Version::supports_avx2(), "");
3593   assert(vector_len != AVX_128bit, "");
3594   // VEX.256.66.0F3A.W1 00 /r ib
3595   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3596   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3597   emit_int24(0x00, (0xC0 | encode), imm8);
3598 }
3599 
3600 void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3601   assert(vector_len == AVX_256bit ? VM_Version::supports_avx512vl() :
3602          vector_len == AVX_512bit ? VM_Version::supports_evex()     : false, "not supported");
3603   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3604   attributes.set_is_evex_instruction();
3605   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3606   emit_int16(0x36, (0xC0 | encode));
3607 }
3608 
3609 void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3610   assert(VM_Version::supports_avx512_vbmi(), "");
3611   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3612   attributes.set_is_evex_instruction();
3613   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3614   emit_int16((unsigned char)0x8D, (0xC0 | encode));
3615 }
3616 
3617 void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3618   assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
3619          vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
3620          vector_len == AVX_512bit ? VM_Version::supports_avx512bw()   : false, "not supported");
3621   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3622   attributes.set_is_evex_instruction();
3623   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3624   emit_int16((unsigned char)0x8D, (0xC0 | encode));
3625 }
3626 
3627 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3628   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3629   // VEX.NDS.256.66.0F38.W0 36 /r
3630   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3631   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3632   emit_int16(0x36, (0xC0 | encode));
3633 }
3634 
3635 void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3636   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3637   // VEX.NDS.256.66.0F38.W0 36 /r
3638   InstructionMark im(this);
3639   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3640   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3641   emit_int8(0x36);
3642   emit_operand(dst, src);
3643 }
3644 
3645 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3646   assert(VM_Version::supports_avx2(), "");
3647   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3648   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3649   emit_int24(0x46, (0xC0 | encode), imm8);
3650 }
3651 
3652 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3653   assert(VM_Version::supports_avx(), "");
3654   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3655   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3656   emit_int24(0x06, (0xC0 | encode), imm8);
3657 }
3658 
3659 void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3660   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
3661   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3662   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3663   emit_int24(0x04, (0xC0 | encode), imm8);
3664 }
3665 
3666 void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3667   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
3668   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);
3669   attributes.set_rex_vex_w_reverted();
3670   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3671   emit_int24(0x05, (0xC0 | encode), imm8);
3672 }
3673 
3674 void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3675   assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
3676   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ false);
3677   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3678   emit_int24(0x01, (0xC0 | encode), imm8);
3679 }
3680 
3681 void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3682   assert(VM_Version::supports_evex(), "");
3683   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3684   attributes.set_is_evex_instruction();
3685   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3686   emit_int16(0x76, (0xC0 | encode));
3687 }
3688 
3689 void Assembler::pause() {
3690   emit_int16((unsigned char)0xF3, (unsigned char)0x90);
3691 }
3692 
3693 void Assembler::ud2() {
3694   emit_int16(0x0F, 0x0B);
3695 }
3696 
3697 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3698   assert(VM_Version::supports_sse4_2(), "");
3699   InstructionMark im(this);
3700   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3701   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3702   emit_int8(0x61);
3703   emit_operand(dst, src);
3704   emit_int8(imm8);
3705 }
3706 
3707 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3708   assert(VM_Version::supports_sse4_2(), "");
3709   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3710   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3711   emit_int24(0x61, (0xC0 | encode), imm8);
3712 }
3713 
3714 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3715 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3716   assert(VM_Version::supports_sse2(), "");
3717   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3718   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3719   emit_int16(0x74, (0xC0 | encode));
3720 }
3721 
3722 void Assembler::vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
3723   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3724   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3725   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3726   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3727   emit_int16(cond_encoding, (0xC0 | encode));
3728 }
3729 
3730 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3731 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3732   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3733   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3734   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3735   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3736   emit_int16(0x74, (0xC0 | encode));
3737 }
3738 
3739 // In this context, kdst is written the mask used to process the equal components
3740 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3741   assert(VM_Version::supports_avx512bw(), "");
3742   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3743   attributes.set_is_evex_instruction();
3744   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3745   emit_int16(0x74, (0xC0 | encode));
3746 }
3747 
3748 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3749   assert(VM_Version::supports_avx512vlbw(), "");
3750   InstructionMark im(this);
3751   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3752   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3753   attributes.set_is_evex_instruction();
3754   int dst_enc = kdst->encoding();
3755   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3756   emit_int8(0x64);
3757   emit_operand(as_Register(dst_enc), src);
3758 }
3759 
3760 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3761   assert(VM_Version::supports_avx512vlbw(), "");
3762   InstructionMark im(this);
3763   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3764   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3765   attributes.reset_is_clear_context();
3766   attributes.set_embedded_opmask_register_specifier(mask);
3767   attributes.set_is_evex_instruction();
3768   int dst_enc = kdst->encoding();
3769   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3770   emit_int8(0x64);
3771   emit_operand(as_Register(dst_enc), src);
3772 }
3773 
3774 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3775   assert(VM_Version::supports_avx512vlbw(), "");
3776   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3777   attributes.set_is_evex_instruction();
3778   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3779   emit_int24(0x3E, (0xC0 | encode), vcc);
3780 }
3781 
3782 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3783   assert(VM_Version::supports_avx512vlbw(), "");
3784   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3785   attributes.reset_is_clear_context();
3786   attributes.set_embedded_opmask_register_specifier(mask);
3787   attributes.set_is_evex_instruction();
3788   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3789   emit_int24(0x3E, (0xC0 | encode), vcc);
3790 }
3791 
3792 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3793   assert(VM_Version::supports_avx512vlbw(), "");
3794   InstructionMark im(this);
3795   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3796   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3797   attributes.set_is_evex_instruction();
3798   int dst_enc = kdst->encoding();
3799   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3800   emit_int8(0x3E);
3801   emit_operand(as_Register(dst_enc), src);
3802   emit_int8(vcc);
3803 }
3804 
3805 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3806   assert(VM_Version::supports_avx512bw(), "");
3807   InstructionMark im(this);
3808   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3809   attributes.set_is_evex_instruction();
3810   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3811   int dst_enc = kdst->encoding();
3812   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3813   emit_int8(0x74);
3814   emit_operand(as_Register(dst_enc), src);
3815 }
3816 
3817 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3818   assert(VM_Version::supports_avx512vlbw(), "");
3819   InstructionMark im(this);
3820   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3821   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3822   attributes.reset_is_clear_context();
3823   attributes.set_embedded_opmask_register_specifier(mask);
3824   attributes.set_is_evex_instruction();
3825   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3826   emit_int8(0x74);
3827   emit_operand(as_Register(kdst->encoding()), src);
3828 }
3829 
3830 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3831 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3832   assert(VM_Version::supports_sse2(), "");
3833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3834   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3835   emit_int16(0x75, (0xC0 | encode));
3836 }
3837 
3838 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3839 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3840   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3841   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3842   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3843   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3844   emit_int16(0x75, (0xC0 | encode));
3845 }
3846 
3847 // In this context, kdst is written the mask used to process the equal components
3848 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3849   assert(VM_Version::supports_avx512bw(), "");
3850   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3851   attributes.set_is_evex_instruction();
3852   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3853   emit_int16(0x75, (0xC0 | encode));
3854 }
3855 
3856 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3857   assert(VM_Version::supports_avx512bw(), "");
3858   InstructionMark im(this);
3859   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3860   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3861   attributes.set_is_evex_instruction();
3862   int dst_enc = kdst->encoding();
3863   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3864   emit_int8(0x75);
3865   emit_operand(as_Register(dst_enc), src);
3866 }
3867 
3868 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3869 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3870   assert(VM_Version::supports_sse2(), "");
3871   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3872   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3873   emit_int16(0x76, (0xC0 | encode));
3874 }
3875 
3876 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3877 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3878   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
3879   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
3880   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3881   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3882   emit_int16(0x76, (0xC0 | encode));
3883 }
3884 
3885 // In this context, kdst is written the mask used to process the equal components
3886 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
3887   assert(VM_Version::supports_evex(), "");
3888   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3889   attributes.set_is_evex_instruction();
3890   attributes.reset_is_clear_context();
3891   attributes.set_embedded_opmask_register_specifier(mask);
3892   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3893   emit_int16(0x76, (0xC0 | encode));
3894 }
3895 
3896 void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3897   assert(VM_Version::supports_evex(), "");
3898   InstructionMark im(this);
3899   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3900   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3901   attributes.set_is_evex_instruction();
3902   attributes.reset_is_clear_context();
3903   attributes.set_embedded_opmask_register_specifier(mask);
3904   int dst_enc = kdst->encoding();
3905   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3906   emit_int8(0x76);
3907   emit_operand(as_Register(dst_enc), src);
3908 }
3909 
3910 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3911 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3912   assert(VM_Version::supports_sse4_1(), "");
3913   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3914   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3915   emit_int16(0x29, (0xC0 | encode));
3916 }
3917 
3918 void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
3919   assert(VM_Version::supports_avx(), "");
3920   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3921   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3922   emit_int16(cond_encoding, (0xC0 | encode));
3923 }
3924 
3925 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3926 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3927   assert(VM_Version::supports_avx(), "");
3928   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3929   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3930   emit_int16(0x29, (0xC0 | encode));
3931 }
3932 
3933 // In this context, kdst is written the mask used to process the equal components
3934 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3935   assert(VM_Version::supports_evex(), "");
3936   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3937   attributes.reset_is_clear_context();
3938   attributes.set_is_evex_instruction();
3939   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3940   emit_int16(0x29, (0xC0 | encode));
3941 }
3942 
3943 // In this context, kdst is written the mask used to process the equal components
3944 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3945   assert(VM_Version::supports_evex(), "");
3946   InstructionMark im(this);
3947   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3948   attributes.reset_is_clear_context();
3949   attributes.set_is_evex_instruction();
3950   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3951   int dst_enc = kdst->encoding();
3952   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3953   emit_int8(0x29);
3954   emit_operand(as_Register(dst_enc), src);
3955 }
3956 
3957 void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) {
3958   assert(UseAVX > 2  && VM_Version::supports_avx512dq(), "");
3959   assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
3960   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3961   attributes.set_is_evex_instruction();
3962   int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3963   emit_int16(0x39, (0xC0 | encode));
3964 }
3965 
3966 void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) {
3967   assert(UseAVX > 2  && VM_Version::supports_avx512dq(), "");
3968   assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
3969   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3970   attributes.set_is_evex_instruction();
3971   int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3972   emit_int16(0x39, (0xC0 | encode));
3973 }
3974 
3975 void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
3976   assert(VM_Version::supports_sse4_1(), "");
3977   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3978   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3979   emit_int16(0x37, (0xC0 | encode));
3980 }
3981 
3982 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3983   assert(VM_Version::supports_sse2(), "");
3984   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3985   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3986   emit_int16((unsigned char)0xD7, (0xC0 | encode));
3987 }
3988 
3989 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3990   assert(VM_Version::supports_avx2(), "");
3991   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
3992   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3993   emit_int16((unsigned char)0xD7, (0xC0 | encode));
3994 }
3995 
3996 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3997   assert(VM_Version::supports_sse4_1(), "");
3998   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3999   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4000   emit_int24(0x16, (0xC0 | encode), imm8);
4001 }
4002 
4003 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
4004   assert(VM_Version::supports_sse4_1(), "");
4005   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4006   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4007   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4008   emit_int8(0x16);
4009   emit_operand(src, dst);
4010   emit_int8(imm8);
4011 }
4012 
4013 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
4014   assert(VM_Version::supports_sse4_1(), "");
4015   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4016   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4017   emit_int24(0x16, (0xC0 | encode), imm8);
4018 }
4019 
4020 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
4021   assert(VM_Version::supports_sse4_1(), "");
4022   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4023   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4024   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4025   emit_int8(0x16);
4026   emit_operand(src, dst);
4027   emit_int8(imm8);
4028 }
4029 
4030 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
4031   assert(VM_Version::supports_sse2(), "");
4032   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4033   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4034   emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8);
4035 }
4036 
4037 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
4038   assert(VM_Version::supports_sse4_1(), "");
4039   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4040   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4041   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4042   emit_int8(0x15);
4043   emit_operand(src, dst);
4044   emit_int8(imm8);
4045 }
4046 
4047 void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
4048   assert(VM_Version::supports_sse4_1(), "");
4049   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4050   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4051   emit_int24(0x14, (0xC0 | encode), imm8);
4052 }
4053 
4054 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
4055   assert(VM_Version::supports_sse4_1(), "");
4056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4057   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4058   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4059   emit_int8(0x14);
4060   emit_operand(src, dst);
4061   emit_int8(imm8);
4062 }
4063 
4064 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
4065   assert(VM_Version::supports_sse4_1(), "");
4066   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4067   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4068   emit_int24(0x22, (0xC0 | encode), imm8);
4069 }
4070 
4071 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
4072   assert(VM_Version::supports_sse4_1(), "");
4073   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4074   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4075   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4076   emit_int8(0x22);
4077   emit_operand(dst,src);
4078   emit_int8(imm8);
4079 }
4080 
4081 void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4082   assert(VM_Version::supports_avx(), "");
4083   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4084   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4085   emit_int24(0x22, (0xC0 | encode), imm8);
4086 }
4087 
4088 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
4089   assert(VM_Version::supports_sse4_1(), "");
4090   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4091   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4092   emit_int24(0x22, (0xC0 | encode), imm8);
4093 }
4094 
4095 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
4096   assert(VM_Version::supports_sse4_1(), "");
4097   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4098   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4099   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4100   emit_int8(0x22);
4101   emit_operand(dst, src);
4102   emit_int8(imm8);
4103 }
4104 
4105 void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4106   assert(VM_Version::supports_avx(), "");
4107   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
4108   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4109   emit_int24(0x22, (0xC0 | encode), imm8);
4110 }
4111 
4112 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
4113   assert(VM_Version::supports_sse2(), "");
4114   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4115   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4116   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
4117 }
4118 
4119 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
4120   assert(VM_Version::supports_sse2(), "");
4121   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4122   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
4123   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4124   emit_int8((unsigned char)0xC4);
4125   emit_operand(dst, src);
4126   emit_int8(imm8);
4127 }
4128 
4129 void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4130   assert(VM_Version::supports_avx(), "");
4131   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4132   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4133   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
4134 }
4135 
4136 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
4137   assert(VM_Version::supports_sse4_1(), "");
4138   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4139   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
4140   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4141   emit_int8(0x20);
4142   emit_operand(dst, src);
4143   emit_int8(imm8);
4144 }
4145 
4146 void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) {
4147   assert(VM_Version::supports_sse4_1(), "");
4148   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4149   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4150   emit_int24(0x20, (0xC0 | encode), imm8);
4151 }
4152 
4153 void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
4154   assert(VM_Version::supports_avx(), "");
4155   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4156   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4157   emit_int24(0x20, (0xC0 | encode), imm8);
4158 }
4159 
4160 void Assembler::insertps(XMMRegister dst, XMMRegister src, int imm8) {
4161   assert(VM_Version::supports_sse4_1(), "");
4162   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4163   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4164   emit_int24(0x21, (0xC0 | encode), imm8);
4165 }
4166 
4167 void Assembler::vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
4168   assert(VM_Version::supports_avx(), "");
4169   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4170   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4171   emit_int24(0x21, (0xC0 | encode), imm8);
4172 }
4173 
4174 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
4175   assert(VM_Version::supports_sse4_1(), "");
4176   InstructionMark im(this);
4177   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4178   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4179   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4180   emit_int8(0x30);
4181   emit_operand(dst, src);
4182 }
4183 
4184 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
4185   assert(VM_Version::supports_sse4_1(), "");
4186   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4187   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4188   emit_int16(0x30, (0xC0 | encode));
4189 }
4190 
4191 void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
4192   assert(VM_Version::supports_sse4_1(), "");
4193   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4194   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4195   emit_int16(0x20, (0xC0 | encode));
4196 }
4197 
4198 void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
4199   assert(VM_Version::supports_sse4_1(), "");
4200   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4201   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4202   emit_int16(0x35, (0xC0 | encode));
4203 }
4204 
4205 void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
4206   assert(VM_Version::supports_sse4_1(), "");
4207   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4208   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4209   emit_int16(0x21, (0xC0 | encode));
4210 }
4211 
4212 void Assembler::pmovzxbd(XMMRegister dst, XMMRegister src) {
4213   assert(VM_Version::supports_sse4_1(), "");
4214   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4215   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4216   emit_int16(0x31, (0xC0 | encode));
4217 }
4218 
4219 void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
4220   assert(VM_Version::supports_sse4_1(), "");
4221   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4222   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4223   emit_int16(0x22, (0xC0 | encode));
4224 }
4225 
4226 void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
4227   assert(VM_Version::supports_sse4_1(), "");
4228   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4229   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4230   emit_int16(0x23, (0xC0 | encode));
4231 }
4232 
4233 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
4234   assert(VM_Version::supports_avx(), "");
4235   InstructionMark im(this);
4236   assert(dst != xnoreg, "sanity");
4237   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4238   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4239   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4240   emit_int8(0x30);
4241   emit_operand(dst, src);
4242 }
4243 
4244 void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4245   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4246   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4247   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4248   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4249   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4250   emit_int16(0x30, (unsigned char) (0xC0 | encode));
4251 }
4252 
4253 void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
4254   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4255   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4256   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4257   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4258   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4259   emit_int16(0x20, (0xC0 | encode));
4260 }
4261 
4262 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
4263   assert(VM_Version::supports_avx512vlbw(), "");
4264   assert(dst != xnoreg, "sanity");
4265   InstructionMark im(this);
4266   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4267   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4268   attributes.set_embedded_opmask_register_specifier(mask);
4269   attributes.set_is_evex_instruction();
4270   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4271   emit_int8(0x30);
4272   emit_operand(dst, src);
4273 }
4274 
4275 void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
4276   assert(VM_Version::supports_evex(), "");
4277   // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
4278   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4279   attributes.set_is_evex_instruction();
4280   attributes.set_embedded_opmask_register_specifier(mask);
4281   if (merge) {
4282     attributes.reset_is_clear_context();
4283   }
4284   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4285   emit_int16((unsigned char)0xDB, (0xC0 | encode));
4286 }
4287 
4288 void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4289   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4290   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4291   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4292   emit_int16(0x35, (0xC0 | encode));
4293 }
4294 
4295 void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4296   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4297   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4298   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4299   emit_int16(0x31, (0xC0 | encode));
4300 }
4301 
4302 void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4303   assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
4304   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4305   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4306   emit_int16(0x32, (0xC0 | encode));
4307 }
4308 
4309 void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
4310   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4311          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4312              VM_Version::supports_evex(), "");
4313   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4314   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4315   emit_int16(0x21, (0xC0 | encode));
4316 }
4317 
4318 void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
4319   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4320          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4321              VM_Version::supports_evex(), "");
4322   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4323   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4324   emit_int16(0x22, (0xC0 | encode));
4325 }
4326 
4327 void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) {
4328   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4329          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4330              VM_Version::supports_evex(), "");
4331   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4332   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4333   emit_int16(0x23, (0xC0 | encode));
4334 }
4335 
4336 void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) {
4337   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4338          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4339              VM_Version::supports_evex(), "");
4340   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4341   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4342   emit_int16(0x24, (0xC0 | encode));
4343 }
4344 
4345 void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) {
4346   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4347          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4348              VM_Version::supports_evex(), "");
4349   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4350   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4351   emit_int16(0x25, (0xC0 | encode));
4352 }
4353 
4354 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
4355   assert(VM_Version::supports_avx512vlbw(), "");
4356   assert(src != xnoreg, "sanity");
4357   InstructionMark im(this);
4358   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4359   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4360   attributes.set_is_evex_instruction();
4361   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4362   emit_int8(0x30);
4363   emit_operand(src, dst);
4364 }
4365 
4366 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
4367   assert(VM_Version::supports_avx512vlbw(), "");
4368   assert(src != xnoreg, "sanity");
4369   InstructionMark im(this);
4370   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4371   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
4372   attributes.reset_is_clear_context();
4373   attributes.set_embedded_opmask_register_specifier(mask);
4374   attributes.set_is_evex_instruction();
4375   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4376   emit_int8(0x30);
4377   emit_operand(src, dst);
4378 }
4379 
4380 void Assembler::evpmovdb(Address dst, XMMRegister src, int vector_len) {
4381   assert(VM_Version::supports_evex(), "");
4382   assert(src != xnoreg, "sanity");
4383   InstructionMark im(this);
4384   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4385   attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit);
4386   attributes.set_is_evex_instruction();
4387   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
4388   emit_int8(0x31);
4389   emit_operand(src, dst);
4390 }
4391 
4392 void Assembler::vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len) {
4393   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4394   vector_len == AVX_256bit? VM_Version::supports_avx2() :
4395   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " ");
4396   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4397   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4398   emit_int16(0x33, (0xC0 | encode));
4399 }
4400 
4401 void Assembler::pmaddwd(XMMRegister dst, XMMRegister src) {
4402   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4403   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4404   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4405   emit_int16((unsigned char)0xF5, (0xC0 | encode));
4406 }
4407 
4408 void Assembler::vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4409   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4410     (vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4411     (vector_len == AVX_512bit ? VM_Version::supports_evex() : 0)), "");
4412   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4413   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4414   emit_int16((unsigned char)0xF5, (0xC0 | encode));
4415 }
4416 
4417 void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4418   assert(VM_Version::supports_evex(), "");
4419   assert(VM_Version::supports_avx512_vnni(), "must support vnni");
4420   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4421   attributes.set_is_evex_instruction();
4422   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4423   emit_int16(0x52, (0xC0 | encode));
4424 }
4425 
4426 // generic
4427 void Assembler::pop(Register dst) {
4428   int encode = prefix_and_encode(dst->encoding());
4429   emit_int8(0x58 | encode);
4430 }
4431 
4432 void Assembler::popcntl(Register dst, Address src) {
4433   assert(VM_Version::supports_popcnt(), "must support");
4434   InstructionMark im(this);
4435   emit_int8((unsigned char)0xF3);
4436   prefix(src, dst);
4437   emit_int16(0x0F, (unsigned char)0xB8);
4438   emit_operand(dst, src);
4439 }
4440 
4441 void Assembler::popcntl(Register dst, Register src) {
4442   assert(VM_Version::supports_popcnt(), "must support");
4443   emit_int8((unsigned char)0xF3);
4444   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4445   emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
4446 }
4447 
4448 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
4449   assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
4450   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4451   attributes.set_is_evex_instruction();
4452   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4453   emit_int16(0x55, (0xC0 | encode));
4454 }
4455 
4456 void Assembler::popf() {
4457   emit_int8((unsigned char)0x9D);
4458 }
4459 
4460 #ifndef _LP64 // no 32bit push/pop on amd64
4461 void Assembler::popl(Address dst) {
4462   // NOTE: this will adjust stack by 8byte on 64bits
4463   InstructionMark im(this);
4464   prefix(dst);
4465   emit_int8((unsigned char)0x8F);
4466   emit_operand(rax, dst);
4467 }
4468 #endif
4469 
4470 void Assembler::prefetchnta(Address src) {
4471   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4472   InstructionMark im(this);
4473   prefix(src);
4474   emit_int16(0x0F, 0x18);
4475   emit_operand(rax, src); // 0, src
4476 }
4477 
4478 void Assembler::prefetchr(Address src) {
4479   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4480   InstructionMark im(this);
4481   prefix(src);
4482   emit_int16(0x0F, 0x0D);
4483   emit_operand(rax, src); // 0, src
4484 }
4485 
4486 void Assembler::prefetcht0(Address src) {
4487   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4488   InstructionMark im(this);
4489   prefix(src);
4490   emit_int16(0x0F, 0x18);
4491   emit_operand(rcx, src); // 1, src
4492 }
4493 
4494 void Assembler::prefetcht1(Address src) {
4495   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4496   InstructionMark im(this);
4497   prefix(src);
4498   emit_int16(0x0F, 0x18);
4499   emit_operand(rdx, src); // 2, src
4500 }
4501 
4502 void Assembler::prefetcht2(Address src) {
4503   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
4504   InstructionMark im(this);
4505   prefix(src);
4506   emit_int16(0x0F, 0x18);
4507   emit_operand(rbx, src); // 3, src
4508 }
4509 
4510 void Assembler::prefetchw(Address src) {
4511   assert(VM_Version::supports_3dnow_prefetch(), "must support");
4512   InstructionMark im(this);
4513   prefix(src);
4514   emit_int16(0x0F, 0x0D);
4515   emit_operand(rcx, src); // 1, src
4516 }
4517 
4518 void Assembler::prefix(Prefix p) {
4519   emit_int8(p);
4520 }
4521 
4522 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
4523   assert(VM_Version::supports_ssse3(), "");
4524   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4525   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4526   emit_int16(0x00, (0xC0 | encode));
4527 }
4528 
4529 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4530   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4531          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4532          vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
4533   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4534   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4535   emit_int16(0x00, (0xC0 | encode));
4536 }
4537 
4538 void Assembler::pshufb(XMMRegister dst, Address src) {
4539   assert(VM_Version::supports_ssse3(), "");
4540   InstructionMark im(this);
4541   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4542   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4543   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4544   emit_int8(0x00);
4545   emit_operand(dst, src);
4546 }
4547 
4548 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4549   assert(isByte(mode), "invalid value");
4550   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4551   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4552   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4553   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4554   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4555 }
4556 
4557 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4558   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4559          (vector_len == AVX_256bit? VM_Version::supports_avx2() :
4560          (vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), "");
4561   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4562   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4563   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4564   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4565 }
4566 
4567 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4568   assert(isByte(mode), "invalid value");
4569   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4570   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4571   InstructionMark im(this);
4572   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4573   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4574   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4575   emit_int8(0x70);
4576   emit_operand(dst, src);
4577   emit_int8(mode & 0xFF);
4578 }
4579 
4580 void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) {
4581   assert(isByte(mode), "invalid value");
4582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4583   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4584   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4585   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4586 }
4587 
4588 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4589   assert(isByte(mode), "invalid value");
4590   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4591   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4592   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4593   emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
4594 }
4595 
4596 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4597   assert(isByte(mode), "invalid value");
4598   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4599   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4600   InstructionMark im(this);
4601   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4602   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4603   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4604   emit_int8(0x70);
4605   emit_operand(dst, src);
4606   emit_int8(mode & 0xFF);
4607 }
4608 
4609 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4610   assert(VM_Version::supports_evex(), "requires EVEX support");
4611   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4612   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4613   attributes.set_is_evex_instruction();
4614   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4615   emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
4616 }
4617 
4618 void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
4619   assert(isByte(imm8), "invalid value");
4620   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4621   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4622   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4623   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4624 }
4625 
4626 void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4627   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4628   attributes.set_rex_vex_w_reverted();
4629   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4630   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4631 }
4632 
4633 void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
4634   assert(isByte(imm8), "invalid value");
4635   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4636   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4637   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4638   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4639 }
4640 
4641 void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4642   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4643   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4644   emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
4645 }
4646 
4647 void Assembler::psrldq(XMMRegister dst, int shift) {
4648   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4649   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4650   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4651   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4652   emit_int24(0x73, (0xC0 | encode), shift);
4653 }
4654 
4655 void Assembler::vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4656   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4657          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4658          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : 0, "");
4659   InstructionAttr attributes(vector_len, /*vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4660   int encode = vex_prefix_and_encode(xmm3->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4661   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
4662 }
4663 
4664 void Assembler::pslldq(XMMRegister dst, int shift) {
4665   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4666   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4667   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4668   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4669   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4670   emit_int24(0x73, (0xC0 | encode), shift);
4671 }
4672 
4673 void Assembler::vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4674   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
4675          vector_len == AVX_256bit ? VM_Version::supports_avx2() :
4676          vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : 0, "");
4677   InstructionAttr attributes(vector_len, /*vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4678   int encode = vex_prefix_and_encode(xmm7->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4679   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
4680 }
4681 
4682 void Assembler::ptest(XMMRegister dst, Address src) {
4683   assert(VM_Version::supports_sse4_1(), "");
4684   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4685   InstructionMark im(this);
4686   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4687   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4688   emit_int8(0x17);
4689   emit_operand(dst, src);
4690 }
4691 
4692 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4693   assert(VM_Version::supports_sse4_1() || VM_Version::supports_avx(), "");
4694   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4695   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4696   emit_int8(0x17);
4697   emit_int8((0xC0 | encode));
4698 }
4699 
4700 void Assembler::vptest(XMMRegister dst, Address src) {
4701   assert(VM_Version::supports_avx(), "");
4702   InstructionMark im(this);
4703   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4704   assert(dst != xnoreg, "sanity");
4705   // swap src<->dst for encoding
4706   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4707   emit_int8(0x17);
4708   emit_operand(dst, src);
4709 }
4710 
4711 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4712   assert(VM_Version::supports_avx(), "");
4713   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4714   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4715   emit_int16(0x17, (0xC0 | encode));
4716 }
4717 
4718 void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
4719   assert(VM_Version::supports_avx(), "");
4720   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4721   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4722   emit_int16(0x17, (0xC0 | encode));
4723 }
4724 
4725 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4726   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4727   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4728   InstructionMark im(this);
4729   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true);
4730   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4731   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4732   emit_int8(0x60);
4733   emit_operand(dst, src);
4734 }
4735 
4736 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4737   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4738   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true);
4739   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4740   emit_int16(0x60, (0xC0 | encode));
4741 }
4742 
4743 void Assembler::punpckldq(XMMRegister dst, Address src) {
4744   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4745   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4746   InstructionMark im(this);
4747   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4748   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4749   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4750   emit_int8(0x62);
4751   emit_operand(dst, src);
4752 }
4753 
4754 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4755   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4756   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4757   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4758   emit_int16(0x62, (0xC0 | encode));
4759 }
4760 
4761 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4762   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4763   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4764   attributes.set_rex_vex_w_reverted();
4765   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4766   emit_int16(0x6C, (0xC0 | encode));
4767 }
4768 
4769 void Assembler::push(int32_t imm32) {
4770   // in 64bits we push 64bits onto the stack but only
4771   // take a 32bit immediate
4772   emit_int8(0x68);
4773   emit_int32(imm32);
4774 }
4775 
4776 void Assembler::push(Register src) {
4777   int encode = prefix_and_encode(src->encoding());
4778   emit_int8(0x50 | encode);
4779 }
4780 
4781 void Assembler::pushf() {
4782   emit_int8((unsigned char)0x9C);
4783 }
4784 
4785 #ifndef _LP64 // no 32bit push/pop on amd64
4786 void Assembler::pushl(Address src) {
4787   // Note this will push 64bit on 64bit
4788   InstructionMark im(this);
4789   prefix(src);
4790   emit_int8((unsigned char)0xFF);
4791   emit_operand(rsi, src);
4792 }
4793 #endif
4794 
4795 void Assembler::rcll(Register dst, int imm8) {
4796   assert(isShiftCount(imm8), "illegal shift count");
4797   int encode = prefix_and_encode(dst->encoding());
4798   if (imm8 == 1) {
4799     emit_int16((unsigned char)0xD1, (0xD0 | encode));
4800   } else {
4801     emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
4802   }
4803 }
4804 
4805 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4806   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4807   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4808   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4809   emit_int16(0x53, (0xC0 | encode));
4810 }
4811 
4812 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4813   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4815   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4816   emit_int16(0x53, (0xC0 | encode));
4817 }
4818 
4819 void Assembler::rdtsc() {
4820   emit_int16(0x0F, 0x31);
4821 }
4822 
4823 // copies data from [esi] to [edi] using rcx pointer sized words
4824 // generic
4825 void Assembler::rep_mov() {
4826   // REP
4827   // MOVSQ
4828   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xA5);)
4829   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xA5);)
4830 }
4831 
4832 // sets rcx bytes with rax, value at [edi]
4833 void Assembler::rep_stosb() {
4834   // REP
4835   // STOSB
4836   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAA);)
4837   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xAA);)
4838 }
4839 
4840 // sets rcx pointer sized words with rax, value at [edi]
4841 // generic
4842 void Assembler::rep_stos() {
4843   // REP
4844   // LP64:STOSQ, LP32:STOSD
4845   LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAB);)
4846   NOT_LP64( emit_int16((unsigned char)0xF3,        (unsigned char)0xAB);)
4847 }
4848 
4849 // scans rcx pointer sized words at [edi] for occurance of rax,
4850 // generic
4851 void Assembler::repne_scan() { // repne_scan
4852   // SCASQ
4853   LP64_ONLY(emit_int24((unsigned char)0xF2, REX_W, (unsigned char)0xAF);)
4854   NOT_LP64( emit_int16((unsigned char)0xF2,        (unsigned char)0xAF);)
4855 }
4856 
4857 #ifdef _LP64
4858 // scans rcx 4 byte words at [edi] for occurance of rax,
4859 // generic
4860 void Assembler::repne_scanl() { // repne_scan
4861   // SCASL
4862   emit_int16((unsigned char)0xF2, (unsigned char)0xAF);
4863 }
4864 #endif
4865 
4866 void Assembler::ret(int imm16) {
4867   if (imm16 == 0) {
4868     emit_int8((unsigned char)0xC3);
4869   } else {
4870     emit_int8((unsigned char)0xC2);
4871     emit_int16(imm16);
4872   }
4873 }
4874 
4875 void Assembler::sahf() {
4876 #ifdef _LP64
4877   // Not supported in 64bit mode
4878   ShouldNotReachHere();
4879 #endif
4880   emit_int8((unsigned char)0x9E);
4881 }
4882 
4883 void Assembler::sarl(Register dst, int imm8) {
4884   int encode = prefix_and_encode(dst->encoding());
4885   assert(isShiftCount(imm8), "illegal shift count");
4886   if (imm8 == 1) {
4887     emit_int16((unsigned char)0xD1, (0xF8 | encode));
4888   } else {
4889     emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
4890   }
4891 }
4892 
4893 void Assembler::sarl(Register dst) {
4894   int encode = prefix_and_encode(dst->encoding());
4895   emit_int16((unsigned char)0xD3, (0xF8 | encode));
4896 }
4897 
4898 void Assembler::sbbl(Address dst, int32_t imm32) {
4899   InstructionMark im(this);
4900   prefix(dst);
4901   emit_arith_operand(0x81, rbx, dst, imm32);
4902 }
4903 
4904 void Assembler::sbbl(Register dst, int32_t imm32) {
4905   prefix(dst);
4906   emit_arith(0x81, 0xD8, dst, imm32);
4907 }
4908 
4909 
4910 void Assembler::sbbl(Register dst, Address src) {
4911   InstructionMark im(this);
4912   prefix(src, dst);
4913   emit_int8(0x1B);
4914   emit_operand(dst, src);
4915 }
4916 
4917 void Assembler::sbbl(Register dst, Register src) {
4918   (void) prefix_and_encode(dst->encoding(), src->encoding());
4919   emit_arith(0x1B, 0xC0, dst, src);
4920 }
4921 
4922 void Assembler::setb(Condition cc, Register dst) {
4923   assert(0 <= cc && cc < 16, "illegal cc");
4924   int encode = prefix_and_encode(dst->encoding(), true);
4925   emit_int24(0x0F, (unsigned char)0x90 | cc, (0xC0 | encode));
4926 }
4927 
4928 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4929   assert(VM_Version::supports_ssse3(), "");
4930   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4931   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4932   emit_int24(0x0F, (0xC0 | encode), imm8);
4933 }
4934 
4935 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4936   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4937          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4938          0, "");
4939   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4940   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4941   emit_int24(0x0F, (0xC0 | encode), imm8);
4942 }
4943 
4944 void Assembler::evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
4945   assert(VM_Version::supports_evex(), "");
4946   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4947   attributes.set_is_evex_instruction();
4948   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4949   emit_int24(0x3, (0xC0 | encode), imm8);
4950 }
4951 
4952 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4953   assert(VM_Version::supports_sse4_1(), "");
4954   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4955   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4956   emit_int24(0x0E, (0xC0 | encode), imm8);
4957 }
4958 
4959 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4960   assert(VM_Version::supports_sha(), "");
4961   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4962   emit_int24((unsigned char)0xCC, (0xC0 | encode), (unsigned char)imm8);
4963 }
4964 
4965 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4966   assert(VM_Version::supports_sha(), "");
4967   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4968   emit_int16((unsigned char)0xC8, (0xC0 | encode));
4969 }
4970 
4971 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4972   assert(VM_Version::supports_sha(), "");
4973   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4974   emit_int16((unsigned char)0xC9, (0xC0 | encode));
4975 }
4976 
4977 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4978   assert(VM_Version::supports_sha(), "");
4979   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4980   emit_int16((unsigned char)0xCA, (0xC0 | encode));
4981 }
4982 
4983 // xmm0 is implicit additional source to this instruction.
4984 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4985   assert(VM_Version::supports_sha(), "");
4986   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4987   emit_int16((unsigned char)0xCB, (0xC0 | encode));
4988 }
4989 
4990 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4991   assert(VM_Version::supports_sha(), "");
4992   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4993   emit_int16((unsigned char)0xCC, (0xC0 | encode));
4994 }
4995 
4996 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4997   assert(VM_Version::supports_sha(), "");
4998   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4999   emit_int16((unsigned char)0xCD, (0xC0 | encode));
5000 }
5001 
5002 
5003 void Assembler::shll(Register dst, int imm8) {
5004   assert(isShiftCount(imm8), "illegal shift count");
5005   int encode = prefix_and_encode(dst->encoding());
5006   if (imm8 == 1 ) {
5007     emit_int16((unsigned char)0xD1, (0xE0 | encode));
5008   } else {
5009     emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
5010   }
5011 }
5012 
5013 void Assembler::shll(Register dst) {
5014   int encode = prefix_and_encode(dst->encoding());
5015   emit_int16((unsigned char)0xD3, (0xE0 | encode));
5016 }
5017 
5018 void Assembler::shrl(Register dst, int imm8) {
5019   assert(isShiftCount(imm8), "illegal shift count");
5020   int encode = prefix_and_encode(dst->encoding());
5021   emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
5022 }
5023 
5024 void Assembler::shrl(Register dst) {
5025   int encode = prefix_and_encode(dst->encoding());
5026   emit_int16((unsigned char)0xD3, (0xE8 | encode));
5027 }
5028 
5029 void Assembler::shldl(Register dst, Register src) {
5030   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5031   emit_int24(0x0F, (unsigned char)0xA5, (0xC0 | encode));
5032 }
5033 
5034 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
5035   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5036   emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
5037 }
5038 
5039 void Assembler::shrdl(Register dst, Register src) {
5040   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5041   emit_int24(0x0F, (unsigned char)0xAD, (0xC0 | encode));
5042 }
5043 
5044 void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
5045   int encode = prefix_and_encode(src->encoding(), dst->encoding());
5046   emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
5047 }
5048 
5049 // copies a single word from [esi] to [edi]
5050 void Assembler::smovl() {
5051   emit_int8((unsigned char)0xA5);
5052 }
5053 
5054 void Assembler::roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) {
5055   assert(VM_Version::supports_sse4_1(), "");
5056   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5057   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5058   emit_int24(0x0B, (0xC0 | encode), (unsigned char)rmode);
5059 }
5060 
5061 void Assembler::roundsd(XMMRegister dst, Address src, int32_t rmode) {
5062   assert(VM_Version::supports_sse4_1(), "");
5063   InstructionMark im(this);
5064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5065   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5066   emit_int8(0x0B);
5067   emit_operand(dst, src);
5068   emit_int8((unsigned char)rmode);
5069 }
5070 
5071 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
5072   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5073   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5074   attributes.set_rex_vex_w_reverted();
5075   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5076   emit_int16(0x51, (0xC0 | encode));
5077 }
5078 
5079 void Assembler::sqrtsd(XMMRegister dst, Address src) {
5080   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5081   InstructionMark im(this);
5082   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5083   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5084   attributes.set_rex_vex_w_reverted();
5085   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5086   emit_int8(0x51);
5087   emit_operand(dst, src);
5088 }
5089 
5090 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
5091   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5092   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5093   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5094   emit_int16(0x51, (0xC0 | encode));
5095 }
5096 
5097 void Assembler::std() {
5098   emit_int8((unsigned char)0xFD);
5099 }
5100 
5101 void Assembler::sqrtss(XMMRegister dst, Address src) {
5102   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5103   InstructionMark im(this);
5104   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5105   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5106   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5107   emit_int8(0x51);
5108   emit_operand(dst, src);
5109 }
5110 
5111 void Assembler::stmxcsr( Address dst) {
5112   if (UseAVX > 0 ) {
5113     assert(VM_Version::supports_avx(), "");
5114     InstructionMark im(this);
5115     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5116     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5117     emit_int8((unsigned char)0xAE);
5118     emit_operand(as_Register(3), dst);
5119   } else {
5120     NOT_LP64(assert(VM_Version::supports_sse(), ""));
5121     InstructionMark im(this);
5122     prefix(dst);
5123     emit_int16(0x0F, (unsigned char)0xAE);
5124     emit_operand(as_Register(3), dst);
5125   }
5126 }
5127 
5128 void Assembler::subl(Address dst, int32_t imm32) {
5129   InstructionMark im(this);
5130   prefix(dst);
5131   emit_arith_operand(0x81, rbp, dst, imm32);
5132 }
5133 
5134 void Assembler::subl(Address dst, Register src) {
5135   InstructionMark im(this);
5136   prefix(dst, src);
5137   emit_int8(0x29);
5138   emit_operand(src, dst);
5139 }
5140 
5141 void Assembler::subl(Register dst, int32_t imm32) {
5142   prefix(dst);
5143   emit_arith(0x81, 0xE8, dst, imm32);
5144 }
5145 
5146 // Force generation of a 4 byte immediate value even if it fits into 8bit
5147 void Assembler::subl_imm32(Register dst, int32_t imm32) {
5148   prefix(dst);
5149   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5150 }
5151 
5152 void Assembler::subl(Register dst, Address src) {
5153   InstructionMark im(this);
5154   prefix(src, dst);
5155   emit_int8(0x2B);
5156   emit_operand(dst, src);
5157 }
5158 
5159 void Assembler::subl(Register dst, Register src) {
5160   (void) prefix_and_encode(dst->encoding(), src->encoding());
5161   emit_arith(0x2B, 0xC0, dst, src);
5162 }
5163 
5164 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
5165   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5166   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5167   attributes.set_rex_vex_w_reverted();
5168   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5169   emit_int16(0x5C, (0xC0 | encode));
5170 }
5171 
5172 void Assembler::subsd(XMMRegister dst, Address src) {
5173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5174   InstructionMark im(this);
5175   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5176   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5177   attributes.set_rex_vex_w_reverted();
5178   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5179   emit_int8(0x5C);
5180   emit_operand(dst, src);
5181 }
5182 
5183 void Assembler::subss(XMMRegister dst, XMMRegister src) {
5184   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5185   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
5186   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5187   emit_int16(0x5C, (0xC0 | encode));
5188 }
5189 
5190 void Assembler::subss(XMMRegister dst, Address src) {
5191   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5192   InstructionMark im(this);
5193   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5194   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5195   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5196   emit_int8(0x5C);
5197   emit_operand(dst, src);
5198 }
5199 
5200 void Assembler::testb(Register dst, int imm8) {
5201   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
5202   (void) prefix_and_encode(dst->encoding(), true);
5203   emit_arith_b(0xF6, 0xC0, dst, imm8);
5204 }
5205 
5206 void Assembler::testb(Address dst, int imm8) {
5207   InstructionMark im(this);
5208   prefix(dst);
5209   emit_int8((unsigned char)0xF6);
5210   emit_operand(rax, dst, 1);
5211   emit_int8(imm8);
5212 }
5213 
5214 void Assembler::testl(Register dst, int32_t imm32) {
5215   // not using emit_arith because test
5216   // doesn't support sign-extension of
5217   // 8bit operands
5218   int encode = dst->encoding();
5219   if (encode == 0) {
5220     emit_int8((unsigned char)0xA9);
5221   } else {
5222     encode = prefix_and_encode(encode);
5223     emit_int16((unsigned char)0xF7, (0xC0 | encode));
5224   }
5225   emit_int32(imm32);
5226 }
5227 
5228 void Assembler::testl(Register dst, Register src) {
5229   (void) prefix_and_encode(dst->encoding(), src->encoding());
5230   emit_arith(0x85, 0xC0, dst, src);
5231 }
5232 
5233 void Assembler::testl(Register dst, Address src) {
5234   InstructionMark im(this);
5235   prefix(src, dst);
5236   emit_int8((unsigned char)0x85);
5237   emit_operand(dst, src);
5238 }
5239 
5240 void Assembler::tzcntl(Register dst, Register src) {
5241   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5242   emit_int8((unsigned char)0xF3);
5243   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5244   emit_int24(0x0F,
5245              (unsigned char)0xBC,
5246              0xC0 | encode);
5247 }
5248 
5249 void Assembler::tzcntq(Register dst, Register src) {
5250   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
5251   emit_int8((unsigned char)0xF3);
5252   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5253   emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
5254 }
5255 
5256 void Assembler::ucomisd(XMMRegister dst, Address src) {
5257   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5258   InstructionMark im(this);
5259   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5260   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5261   attributes.set_rex_vex_w_reverted();
5262   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5263   emit_int8(0x2E);
5264   emit_operand(dst, src);
5265 }
5266 
5267 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
5268   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5269   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5270   attributes.set_rex_vex_w_reverted();
5271   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5272   emit_int16(0x2E, (0xC0 | encode));
5273 }
5274 
5275 void Assembler::ucomiss(XMMRegister dst, Address src) {
5276   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5277   InstructionMark im(this);
5278   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5279   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5280   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5281   emit_int8(0x2E);
5282   emit_operand(dst, src);
5283 }
5284 
5285 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
5286   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5287   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5288   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5289   emit_int16(0x2E, (0xC0 | encode));
5290 }
5291 
5292 void Assembler::xabort(int8_t imm8) {
5293   emit_int24((unsigned char)0xC6, (unsigned char)0xF8, (imm8 & 0xFF));
5294 }
5295 
5296 void Assembler::xaddb(Address dst, Register src) {
5297   InstructionMark im(this);
5298   prefix(dst, src, true);
5299   emit_int16(0x0F, (unsigned char)0xC0);
5300   emit_operand(src, dst);
5301 }
5302 
5303 void Assembler::xaddw(Address dst, Register src) {
5304   InstructionMark im(this);
5305   emit_int8(0x66);
5306   prefix(dst, src);
5307   emit_int16(0x0F, (unsigned char)0xC1);
5308   emit_operand(src, dst);
5309 }
5310 
5311 void Assembler::xaddl(Address dst, Register src) {
5312   InstructionMark im(this);
5313   prefix(dst, src);
5314   emit_int16(0x0F, (unsigned char)0xC1);
5315   emit_operand(src, dst);
5316 }
5317 
5318 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
5319   InstructionMark im(this);
5320   relocate(rtype);
5321   if (abort.is_bound()) {
5322     address entry = target(abort);
5323     assert(entry != NULL, "abort entry NULL");
5324     intptr_t offset = entry - pc();
5325     emit_int16((unsigned char)0xC7, (unsigned char)0xF8);
5326     emit_int32(offset - 6); // 2 opcode + 4 address
5327   } else {
5328     abort.add_patch_at(code(), locator());
5329     emit_int16((unsigned char)0xC7, (unsigned char)0xF8);
5330     emit_int32(0);
5331   }
5332 }
5333 
5334 void Assembler::xchgb(Register dst, Address src) { // xchg
5335   InstructionMark im(this);
5336   prefix(src, dst, true);
5337   emit_int8((unsigned char)0x86);
5338   emit_operand(dst, src);
5339 }
5340 
5341 void Assembler::xchgw(Register dst, Address src) { // xchg
5342   InstructionMark im(this);
5343   emit_int8(0x66);
5344   prefix(src, dst);
5345   emit_int8((unsigned char)0x87);
5346   emit_operand(dst, src);
5347 }
5348 
5349 void Assembler::xchgl(Register dst, Address src) { // xchg
5350   InstructionMark im(this);
5351   prefix(src, dst);
5352   emit_int8((unsigned char)0x87);
5353   emit_operand(dst, src);
5354 }
5355 
5356 void Assembler::xchgl(Register dst, Register src) {
5357   int encode = prefix_and_encode(dst->encoding(), src->encoding());
5358   emit_int16((unsigned char)0x87, (0xC0 | encode));
5359 }
5360 
5361 void Assembler::xend() {
5362   emit_int24(0x0F, 0x01, (unsigned char)0xD5);
5363 }
5364 
5365 void Assembler::xgetbv() {
5366   emit_int24(0x0F, 0x01, (unsigned char)0xD0);
5367 }
5368 
5369 void Assembler::xorl(Register dst, int32_t imm32) {
5370   prefix(dst);
5371   emit_arith(0x81, 0xF0, dst, imm32);
5372 }
5373 
5374 void Assembler::xorl(Register dst, Address src) {
5375   InstructionMark im(this);
5376   prefix(src, dst);
5377   emit_int8(0x33);
5378   emit_operand(dst, src);
5379 }
5380 
5381 void Assembler::xorl(Register dst, Register src) {
5382   (void) prefix_and_encode(dst->encoding(), src->encoding());
5383   emit_arith(0x33, 0xC0, dst, src);
5384 }
5385 
5386 void Assembler::xorb(Register dst, Address src) {
5387   InstructionMark im(this);
5388   prefix(src, dst);
5389   emit_int8(0x32);
5390   emit_operand(dst, src);
5391 }
5392 
5393 void Assembler::xorw(Register dst, Register src) {
5394   (void)prefix_and_encode(dst->encoding(), src->encoding());
5395   emit_arith(0x33, 0xC0, dst, src);
5396 }
5397 
5398 // AVX 3-operands scalar float-point arithmetic instructions
5399 
5400 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
5401   assert(VM_Version::supports_avx(), "");
5402   InstructionMark im(this);
5403   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5404   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5405   attributes.set_rex_vex_w_reverted();
5406   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5407   emit_int8(0x58);
5408   emit_operand(dst, src);
5409 }
5410 
5411 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5412   assert(VM_Version::supports_avx(), "");
5413   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5414   attributes.set_rex_vex_w_reverted();
5415   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5416   emit_int16(0x58, (0xC0 | encode));
5417 }
5418 
5419 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
5420   assert(VM_Version::supports_avx(), "");
5421   InstructionMark im(this);
5422   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5423   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5424   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5425   emit_int8(0x58);
5426   emit_operand(dst, src);
5427 }
5428 
5429 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5430   assert(VM_Version::supports_avx(), "");
5431   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5432   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5433   emit_int16(0x58, (0xC0 | encode));
5434 }
5435 
5436 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
5437   assert(VM_Version::supports_avx(), "");
5438   InstructionMark im(this);
5439   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5440   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5441   attributes.set_rex_vex_w_reverted();
5442   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5443   emit_int8(0x5E);
5444   emit_operand(dst, src);
5445 }
5446 
5447 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5448   assert(VM_Version::supports_avx(), "");
5449   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5450   attributes.set_rex_vex_w_reverted();
5451   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5452   emit_int16(0x5E, (0xC0 | encode));
5453 }
5454 
5455 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
5456   assert(VM_Version::supports_avx(), "");
5457   InstructionMark im(this);
5458   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5459   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5460   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5461   emit_int8(0x5E);
5462   emit_operand(dst, src);
5463 }
5464 
5465 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5466   assert(VM_Version::supports_avx(), "");
5467   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5468   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5469   emit_int16(0x5E, (0xC0 | encode));
5470 }
5471 
5472 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5473   assert(VM_Version::supports_fma(), "");
5474   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5475   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5476   emit_int16((unsigned char)0xB9, (0xC0 | encode));
5477 }
5478 
5479 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
5480   assert(VM_Version::supports_fma(), "");
5481   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5482   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5483   emit_int16((unsigned char)0xB9, (0xC0 | encode));
5484 }
5485 
5486 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
5487   assert(VM_Version::supports_avx(), "");
5488   InstructionMark im(this);
5489   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5490   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5491   attributes.set_rex_vex_w_reverted();
5492   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5493   emit_int8(0x59);
5494   emit_operand(dst, src);
5495 }
5496 
5497 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5498   assert(VM_Version::supports_avx(), "");
5499   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5500   attributes.set_rex_vex_w_reverted();
5501   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5502   emit_int16(0x59, (0xC0 | encode));
5503 }
5504 
5505 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
5506   assert(VM_Version::supports_avx(), "");
5507   InstructionMark im(this);
5508   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5509   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5510   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5511   emit_int8(0x59);
5512   emit_operand(dst, src);
5513 }
5514 
5515 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5516   assert(VM_Version::supports_avx(), "");
5517   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5518   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5519   emit_int16(0x59, (0xC0 | encode));
5520 }
5521 
5522 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
5523   assert(VM_Version::supports_avx(), "");
5524   InstructionMark im(this);
5525   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5526   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
5527   attributes.set_rex_vex_w_reverted();
5528   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5529   emit_int8(0x5C);
5530   emit_operand(dst, src);
5531 }
5532 
5533 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5534   assert(VM_Version::supports_avx(), "");
5535   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5536   attributes.set_rex_vex_w_reverted();
5537   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
5538   emit_int16(0x5C, (0xC0 | encode));
5539 }
5540 
5541 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
5542   assert(VM_Version::supports_avx(), "");
5543   InstructionMark im(this);
5544   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5545   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
5546   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5547   emit_int8(0x5C);
5548   emit_operand(dst, src);
5549 }
5550 
5551 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5552   assert(VM_Version::supports_avx(), "");
5553   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
5554   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
5555   emit_int16(0x5C, (0xC0 | encode));
5556 }
5557 
5558 //====================VECTOR ARITHMETIC=====================================
5559 
5560 // Float-point vector arithmetic
5561 
5562 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5563   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5564   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5565   attributes.set_rex_vex_w_reverted();
5566   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5567   emit_int16(0x58, (0xC0 | encode));
5568 }
5569 
5570 void Assembler::addpd(XMMRegister dst, Address src) {
5571   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5572   InstructionMark im(this);
5573   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5574   attributes.set_rex_vex_w_reverted();
5575   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5576   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5577   emit_int8(0x58);
5578   emit_operand(dst, src);
5579 }
5580 
5581 
5582 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5584   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5585   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5586   emit_int16(0x58, (0xC0 | encode));
5587 }
5588 
5589 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5590   assert(VM_Version::supports_avx(), "");
5591   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5592   attributes.set_rex_vex_w_reverted();
5593   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5594   emit_int16(0x58, (0xC0 | encode));
5595 }
5596 
5597 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5598   assert(VM_Version::supports_avx(), "");
5599   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5600   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5601   emit_int16(0x58, (0xC0 | encode));
5602 }
5603 
5604 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5605   assert(VM_Version::supports_avx(), "");
5606   InstructionMark im(this);
5607   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5608   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5609   attributes.set_rex_vex_w_reverted();
5610   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5611   emit_int8(0x58);
5612   emit_operand(dst, src);
5613 }
5614 
5615 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5616   assert(VM_Version::supports_avx(), "");
5617   InstructionMark im(this);
5618   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5619   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5620   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5621   emit_int8(0x58);
5622   emit_operand(dst, src);
5623 }
5624 
5625 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5626   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5627   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5628   attributes.set_rex_vex_w_reverted();
5629   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5630   emit_int16(0x5C, (0xC0 | encode));
5631 }
5632 
5633 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5634   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5635   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5636   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5637   emit_int16(0x5C, (0xC0 | encode));
5638 }
5639 
5640 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5641   assert(VM_Version::supports_avx(), "");
5642   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5643   attributes.set_rex_vex_w_reverted();
5644   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5645   emit_int16(0x5C, (0xC0 | encode));
5646 }
5647 
5648 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5649   assert(VM_Version::supports_avx(), "");
5650   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5651   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5652   emit_int16(0x5C, (0xC0 | encode));
5653 }
5654 
5655 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5656   assert(VM_Version::supports_avx(), "");
5657   InstructionMark im(this);
5658   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5659   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5660   attributes.set_rex_vex_w_reverted();
5661   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5662   emit_int8(0x5C);
5663   emit_operand(dst, src);
5664 }
5665 
5666 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5667   assert(VM_Version::supports_avx(), "");
5668   InstructionMark im(this);
5669   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5670   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5671   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5672   emit_int8(0x5C);
5673   emit_operand(dst, src);
5674 }
5675 
5676 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5678   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5679   attributes.set_rex_vex_w_reverted();
5680   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5681   emit_int16(0x59, (0xC0 | encode));
5682 }
5683 
5684 void Assembler::mulpd(XMMRegister dst, Address src) {
5685   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5686   InstructionMark im(this);
5687   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5688   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5689   attributes.set_rex_vex_w_reverted();
5690   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5691   emit_int8(0x59);
5692   emit_operand(dst, src);
5693 }
5694 
5695 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5696   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5697   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5698   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5699   emit_int16(0x59, (0xC0 | encode));
5700 }
5701 
5702 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5703   assert(VM_Version::supports_avx(), "");
5704   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5705   attributes.set_rex_vex_w_reverted();
5706   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5707   emit_int16(0x59, (0xC0 | encode));
5708 }
5709 
5710 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5711   assert(VM_Version::supports_avx(), "");
5712   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5713   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5714   emit_int16(0x59, (0xC0 | encode));
5715 }
5716 
5717 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5718   assert(VM_Version::supports_avx(), "");
5719   InstructionMark im(this);
5720   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5721   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5722   attributes.set_rex_vex_w_reverted();
5723   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5724   emit_int8(0x59);
5725   emit_operand(dst, src);
5726 }
5727 
5728 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5729   assert(VM_Version::supports_avx(), "");
5730   InstructionMark im(this);
5731   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5732   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5733   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5734   emit_int8(0x59);
5735   emit_operand(dst, src);
5736 }
5737 
5738 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5739   assert(VM_Version::supports_fma(), "");
5740   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5741   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5742   emit_int16((unsigned char)0xB8, (0xC0 | encode));
5743 }
5744 
5745 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5746   assert(VM_Version::supports_fma(), "");
5747   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5748   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5749   emit_int16((unsigned char)0xB8, (0xC0 | encode));
5750 }
5751 
5752 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5753   assert(VM_Version::supports_fma(), "");
5754   InstructionMark im(this);
5755   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5756   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5757   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5758   emit_int8((unsigned char)0xB8);
5759   emit_operand(dst, src2);
5760 }
5761 
5762 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5763   assert(VM_Version::supports_fma(), "");
5764   InstructionMark im(this);
5765   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5766   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5767   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5768   emit_int8((unsigned char)0xB8);
5769   emit_operand(dst, src2);
5770 }
5771 
5772 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5774   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5775   attributes.set_rex_vex_w_reverted();
5776   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5777   emit_int16(0x5E, (0xC0 | encode));
5778 }
5779 
5780 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5781   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5782   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5783   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5784   emit_int16(0x5E, (0xC0 | encode));
5785 }
5786 
5787 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5788   assert(VM_Version::supports_avx(), "");
5789   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5790   attributes.set_rex_vex_w_reverted();
5791   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5792   emit_int16(0x5E, (0xC0 | encode));
5793 }
5794 
5795 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5796   assert(VM_Version::supports_avx(), "");
5797   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5798   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5799   emit_int16(0x5E, (0xC0 | encode));
5800 }
5801 
5802 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5803   assert(VM_Version::supports_avx(), "");
5804   InstructionMark im(this);
5805   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5806   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5807   attributes.set_rex_vex_w_reverted();
5808   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5809   emit_int8(0x5E);
5810   emit_operand(dst, src);
5811 }
5812 
5813 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5814   assert(VM_Version::supports_avx(), "");
5815   InstructionMark im(this);
5816   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5817   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5818   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5819   emit_int8(0x5E);
5820   emit_operand(dst, src);
5821 }
5822 
5823 void Assembler::vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len) {
5824   assert(VM_Version::supports_avx(), "");
5825   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5826   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5827   emit_int24(0x09, (0xC0 | encode), (rmode));
5828 }
5829 
5830 void Assembler::vroundpd(XMMRegister dst, Address src, int32_t rmode,  int vector_len) {
5831   assert(VM_Version::supports_avx(), "");
5832   InstructionMark im(this);
5833   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5834   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5835   emit_int8(0x09);
5836   emit_operand(dst, src);
5837   emit_int8((rmode));
5838 }
5839 
5840 void Assembler::vrndscalepd(XMMRegister dst,  XMMRegister src,  int32_t rmode, int vector_len) {
5841   assert(VM_Version::supports_evex(), "requires EVEX support");
5842   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5843   attributes.set_is_evex_instruction();
5844   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5845   emit_int24(0x09, (0xC0 | encode), (rmode));
5846 }
5847 
5848 void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len) {
5849   assert(VM_Version::supports_evex(), "requires EVEX support");
5850   assert(dst != xnoreg, "sanity");
5851   InstructionMark im(this);
5852   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5853   attributes.set_is_evex_instruction();
5854   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5855   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
5856   emit_int8(0x09);
5857   emit_operand(dst, src);
5858   emit_int8((rmode));
5859 }
5860 
5861 
5862 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5863   assert(VM_Version::supports_avx(), "");
5864   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5865   attributes.set_rex_vex_w_reverted();
5866   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5867   emit_int16(0x51, (0xC0 | encode));
5868 }
5869 
5870 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5871   assert(VM_Version::supports_avx(), "");
5872   InstructionMark im(this);
5873   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5874   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5875   attributes.set_rex_vex_w_reverted();
5876   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5877   emit_int8(0x51);
5878   emit_operand(dst, src);
5879 }
5880 
5881 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5882   assert(VM_Version::supports_avx(), "");
5883   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5884   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5885   emit_int16(0x51, (0xC0 | encode));
5886 }
5887 
5888 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5889   assert(VM_Version::supports_avx(), "");
5890   InstructionMark im(this);
5891   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5892   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5893   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5894   emit_int8(0x51);
5895   emit_operand(dst, src);
5896 }
5897 
5898 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5899   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5900   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5901   attributes.set_rex_vex_w_reverted();
5902   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5903   emit_int16(0x54, (0xC0 | encode));
5904 }
5905 
5906 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5907   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5908   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5909   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5910   emit_int16(0x54, (0xC0 | encode));
5911 }
5912 
5913 void Assembler::andps(XMMRegister dst, Address src) {
5914   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5915   InstructionMark im(this);
5916   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5917   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5918   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5919   emit_int8(0x54);
5920   emit_operand(dst, src);
5921 }
5922 
5923 void Assembler::andpd(XMMRegister dst, Address src) {
5924   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5925   InstructionMark im(this);
5926   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5927   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5928   attributes.set_rex_vex_w_reverted();
5929   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5930   emit_int8(0x54);
5931   emit_operand(dst, src);
5932 }
5933 
5934 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5935   assert(VM_Version::supports_avx(), "");
5936   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5937   attributes.set_rex_vex_w_reverted();
5938   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5939   emit_int16(0x54, (0xC0 | encode));
5940 }
5941 
5942 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5943   assert(VM_Version::supports_avx(), "");
5944   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5945   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5946   emit_int16(0x54, (0xC0 | encode));
5947 }
5948 
5949 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5950   assert(VM_Version::supports_avx(), "");
5951   InstructionMark im(this);
5952   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5953   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5954   attributes.set_rex_vex_w_reverted();
5955   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5956   emit_int8(0x54);
5957   emit_operand(dst, src);
5958 }
5959 
5960 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5961   assert(VM_Version::supports_avx(), "");
5962   InstructionMark im(this);
5963   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5964   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5965   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5966   emit_int8(0x54);
5967   emit_operand(dst, src);
5968 }
5969 
5970 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5971   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5972   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5973   attributes.set_rex_vex_w_reverted();
5974   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5975   emit_int8(0x15);
5976   emit_int8((0xC0 | encode));
5977 }
5978 
5979 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5981   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
5982   attributes.set_rex_vex_w_reverted();
5983   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5984   emit_int16(0x14, (0xC0 | encode));
5985 }
5986 
5987 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5988   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5989   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5990   attributes.set_rex_vex_w_reverted();
5991   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5992   emit_int16(0x57, (0xC0 | encode));
5993 }
5994 
5995 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5996   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5997   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
5998   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5999   emit_int16(0x57, (0xC0 | encode));
6000 }
6001 
6002 void Assembler::xorpd(XMMRegister dst, Address src) {
6003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6004   InstructionMark im(this);
6005   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6006   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6007   attributes.set_rex_vex_w_reverted();
6008   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6009   emit_int8(0x57);
6010   emit_operand(dst, src);
6011 }
6012 
6013 void Assembler::xorps(XMMRegister dst, Address src) {
6014   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6015   InstructionMark im(this);
6016   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6017   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6018   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6019   emit_int8(0x57);
6020   emit_operand(dst, src);
6021 }
6022 
6023 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6024   assert(VM_Version::supports_avx(), "");
6025   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6026   attributes.set_rex_vex_w_reverted();
6027   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6028   emit_int16(0x57, (0xC0 | encode));
6029 }
6030 
6031 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6032   assert(VM_Version::supports_avx(), "");
6033   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6034   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6035   emit_int16(0x57, (0xC0 | encode));
6036 }
6037 
6038 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6039   assert(VM_Version::supports_avx(), "");
6040   InstructionMark im(this);
6041   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6042   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6043   attributes.set_rex_vex_w_reverted();
6044   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6045   emit_int8(0x57);
6046   emit_operand(dst, src);
6047 }
6048 
6049 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6050   assert(VM_Version::supports_avx(), "");
6051   InstructionMark im(this);
6052   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6053   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6054   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6055   emit_int8(0x57);
6056   emit_operand(dst, src);
6057 }
6058 
6059 // Integer vector arithmetic
6060 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6061   assert(VM_Version::supports_avx() && (vector_len == 0) ||
6062          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
6063   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6064   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6065   emit_int16(0x01, (0xC0 | encode));
6066 }
6067 
6068 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6069   assert(VM_Version::supports_avx() && (vector_len == 0) ||
6070          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
6071   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6072   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6073   emit_int16(0x02, (0xC0 | encode));
6074 }
6075 
6076 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
6077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6079   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6080   emit_int16((unsigned char)0xFC, (0xC0 | encode));
6081 }
6082 
6083 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
6084   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6085   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6086   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6087   emit_int16((unsigned char)0xFD, (0xC0 | encode));
6088 }
6089 
6090 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
6091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6092   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6093   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6094   emit_int16((unsigned char)0xFE, (0xC0 | encode));
6095 }
6096 
6097 void Assembler::paddd(XMMRegister dst, Address src) {
6098   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6099   InstructionMark im(this);
6100   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6101   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6102   emit_int8((unsigned char)0xFE);
6103   emit_operand(dst, src);
6104 }
6105 
6106 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
6107   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6108   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6109   attributes.set_rex_vex_w_reverted();
6110   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6111   emit_int16((unsigned char)0xD4, (0xC0 | encode));
6112 }
6113 
6114 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
6115   assert(VM_Version::supports_sse3(), "");
6116   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6117   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6118   emit_int16(0x01, (0xC0 | encode));
6119 }
6120 
6121 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
6122   assert(VM_Version::supports_sse3(), "");
6123   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
6124   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6125   emit_int16(0x02, (0xC0 | encode));
6126 }
6127 
6128 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6129   assert(UseAVX > 0, "requires some form of AVX");
6130   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6131   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6132   emit_int16((unsigned char)0xFC, (0xC0 | encode));
6133 }
6134 
6135 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6136   assert(UseAVX > 0, "requires some form of AVX");
6137   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6138   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6139   emit_int16((unsigned char)0xFD, (0xC0 | encode));
6140 }
6141 
6142 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6143   assert(UseAVX > 0, "requires some form of AVX");
6144   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6145   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6146   emit_int16((unsigned char)0xFE, (0xC0 | encode));
6147 }
6148 
6149 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6150   assert(UseAVX > 0, "requires some form of AVX");
6151   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6152   attributes.set_rex_vex_w_reverted();
6153   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6154   emit_int16((unsigned char)0xD4, (0xC0 | encode));
6155 }
6156 
6157 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6158   assert(UseAVX > 0, "requires some form of AVX");
6159   InstructionMark im(this);
6160   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6161   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6162   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6163   emit_int8((unsigned char)0xFC);
6164   emit_operand(dst, src);
6165 }
6166 
6167 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6168   assert(UseAVX > 0, "requires some form of AVX");
6169   InstructionMark im(this);
6170   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6171   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6172   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6173   emit_int8((unsigned char)0xFD);
6174   emit_operand(dst, src);
6175 }
6176 
6177 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6178   assert(UseAVX > 0, "requires some form of AVX");
6179   InstructionMark im(this);
6180   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6181   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6182   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6183   emit_int8((unsigned char)0xFE);
6184   emit_operand(dst, src);
6185 }
6186 
6187 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6188   assert(UseAVX > 0, "requires some form of AVX");
6189   InstructionMark im(this);
6190   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6191   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6192   attributes.set_rex_vex_w_reverted();
6193   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6194   emit_int8((unsigned char)0xD4);
6195   emit_operand(dst, src);
6196 }
6197 
6198 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
6199   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6200   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6201   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6202   emit_int16((unsigned char)0xF8, (0xC0 | encode));
6203 }
6204 
6205 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
6206   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6207   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6208   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6209   emit_int16((unsigned char)0xF9, (0xC0 | encode));
6210 }
6211 
6212 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
6213   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6214   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6215   emit_int16((unsigned char)0xFA, (0xC0 | encode));
6216 }
6217 
6218 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
6219   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6220   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6221   attributes.set_rex_vex_w_reverted();
6222   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6223   emit_int8((unsigned char)0xFB);
6224   emit_int8((0xC0 | encode));
6225 }
6226 
6227 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6228   assert(UseAVX > 0, "requires some form of AVX");
6229   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6230   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6231   emit_int16((unsigned char)0xF8, (0xC0 | encode));
6232 }
6233 
6234 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6235   assert(UseAVX > 0, "requires some form of AVX");
6236   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6237   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6238   emit_int16((unsigned char)0xF9, (0xC0 | encode));
6239 }
6240 
6241 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6242   assert(UseAVX > 0, "requires some form of AVX");
6243   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6244   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6245   emit_int16((unsigned char)0xFA, (0xC0 | encode));
6246 }
6247 
6248 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6249   assert(UseAVX > 0, "requires some form of AVX");
6250   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6251   attributes.set_rex_vex_w_reverted();
6252   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6253   emit_int16((unsigned char)0xFB, (0xC0 | encode));
6254 }
6255 
6256 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6257   assert(UseAVX > 0, "requires some form of AVX");
6258   InstructionMark im(this);
6259   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6260   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6261   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6262   emit_int8((unsigned char)0xF8);
6263   emit_operand(dst, src);
6264 }
6265 
6266 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6267   assert(UseAVX > 0, "requires some form of AVX");
6268   InstructionMark im(this);
6269   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6270   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6271   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6272   emit_int8((unsigned char)0xF9);
6273   emit_operand(dst, src);
6274 }
6275 
6276 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6277   assert(UseAVX > 0, "requires some form of AVX");
6278   InstructionMark im(this);
6279   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6280   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6281   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6282   emit_int8((unsigned char)0xFA);
6283   emit_operand(dst, src);
6284 }
6285 
6286 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6287   assert(UseAVX > 0, "requires some form of AVX");
6288   InstructionMark im(this);
6289   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6290   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6291   attributes.set_rex_vex_w_reverted();
6292   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6293   emit_int8((unsigned char)0xFB);
6294   emit_operand(dst, src);
6295 }
6296 
6297 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
6298   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6299   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6300   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6301   emit_int16((unsigned char)0xD5, (0xC0 | encode));
6302 }
6303 
6304 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
6305   assert(VM_Version::supports_sse4_1(), "");
6306   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6307   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6308   emit_int16(0x40, (0xC0 | encode));
6309 }
6310 
6311 void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
6312   assert(VM_Version::supports_sse2(), "");
6313   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6315   emit_int16((unsigned char)0xF4, (0xC0 | encode));
6316 }
6317 
6318 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6319   assert(UseAVX > 0, "requires some form of AVX");
6320   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6321   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6322   emit_int16((unsigned char)0xD5, (0xC0 | encode));
6323 }
6324 
6325 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6326   assert(UseAVX > 0, "requires some form of AVX");
6327   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6328   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6329   emit_int16(0x40, (0xC0 | encode));
6330 }
6331 
6332 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6333   assert(UseAVX > 2, "requires some form of EVEX");
6334   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6335   attributes.set_is_evex_instruction();
6336   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6337   emit_int16(0x40, (0xC0 | encode));
6338 }
6339 
6340 void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6341   assert(UseAVX > 0, "requires some form of AVX");
6342   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6343   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6344   emit_int16((unsigned char)0xF4, (0xC0 | encode));
6345 }
6346 
6347 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6348   assert(UseAVX > 0, "requires some form of AVX");
6349   InstructionMark im(this);
6350   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6351   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
6352   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6353   emit_int8((unsigned char)0xD5);
6354   emit_operand(dst, src);
6355 }
6356 
6357 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6358   assert(UseAVX > 0, "requires some form of AVX");
6359   InstructionMark im(this);
6360   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6361   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6362   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6363   emit_int8(0x40);
6364   emit_operand(dst, src);
6365 }
6366 
6367 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6368   assert(UseAVX > 2, "requires some form of EVEX");
6369   InstructionMark im(this);
6370   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
6371   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6372   attributes.set_is_evex_instruction();
6373   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6374   emit_int8(0x40);
6375   emit_operand(dst, src);
6376 }
6377 
6378 // Min, max
6379 void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
6380   assert(VM_Version::supports_sse4_1(), "");
6381   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6382   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6383   emit_int16(0x38, (0xC0 | encode));
6384 }
6385 
6386 void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6387   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6388         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6389   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6390   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6391   emit_int16(0x38, (0xC0 | encode));
6392 }
6393 
6394 void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
6395   assert(VM_Version::supports_sse2(), "");
6396   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6397   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6398   emit_int16((unsigned char)0xEA, (0xC0 | encode));
6399 }
6400 
6401 void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6402   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6403         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6404   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6405   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6406   emit_int16((unsigned char)0xEA, (0xC0 | encode));
6407 }
6408 
6409 void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
6410   assert(VM_Version::supports_sse4_1(), "");
6411   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6412   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6413   emit_int16(0x39, (0xC0 | encode));
6414 }
6415 
6416 void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6417   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6418         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6419   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6420   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6421   emit_int16(0x39, (0xC0 | encode));
6422 }
6423 
6424 void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6425   assert(UseAVX > 2, "requires AVX512F");
6426   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6427   attributes.set_is_evex_instruction();
6428   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6429   emit_int16(0x39, (0xC0 | encode));
6430 }
6431 
6432 void Assembler::minps(XMMRegister dst, XMMRegister src) {
6433   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6434   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6435   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6436   emit_int16(0x5D, (0xC0 | encode));
6437 }
6438 void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6439   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6440   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6441   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6442   emit_int16(0x5D, (0xC0 | encode));
6443 }
6444 
6445 void Assembler::minpd(XMMRegister dst, XMMRegister src) {
6446   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6447   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6448   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6449   emit_int16(0x5D, (0xC0 | encode));
6450 }
6451 void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6452   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6453   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6454   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6455   emit_int16(0x5D, (0xC0 | encode));
6456 }
6457 
6458 void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
6459   assert(VM_Version::supports_sse4_1(), "");
6460   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6461   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6462   emit_int16(0x3C, (0xC0 | encode));
6463 }
6464 
6465 void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6466   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6467         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6468   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6469   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6470   emit_int16(0x3C, (0xC0 | encode));
6471 }
6472 
6473 void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
6474   assert(VM_Version::supports_sse2(), "");
6475   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6476   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6477   emit_int16((unsigned char)0xEE, (0xC0 | encode));
6478 }
6479 
6480 void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6481   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6482         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
6483   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6484   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6485   emit_int16((unsigned char)0xEE, (0xC0 | encode));
6486 }
6487 
6488 void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
6489   assert(VM_Version::supports_sse4_1(), "");
6490   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6491   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6492   emit_int16(0x3D, (0xC0 | encode));
6493 }
6494 
6495 void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6496   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
6497         (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
6498   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6499   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6500   emit_int16(0x3D, (0xC0 | encode));
6501 }
6502 
6503 void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6504   assert(UseAVX > 2, "requires AVX512F");
6505   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6506   attributes.set_is_evex_instruction();
6507   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6508   emit_int16(0x3D, (0xC0 | encode));
6509 }
6510 
6511 void Assembler::maxps(XMMRegister dst, XMMRegister src) {
6512   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6513   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6514   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6515   emit_int16(0x5F, (0xC0 | encode));
6516 }
6517 
6518 void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6519   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6520   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6521   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6522   emit_int16(0x5F, (0xC0 | encode));
6523 }
6524 
6525 void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
6526   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6527   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6528   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6529   emit_int16(0x5F, (0xC0 | encode));
6530 }
6531 
6532 void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6533   assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
6534   InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6535   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6536   emit_int16(0x5F, (0xC0 | encode));
6537 }
6538 
6539 // Shift packed integers left by specified number of bits.
6540 void Assembler::psllw(XMMRegister dst, int shift) {
6541   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6542   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6543   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6544   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6545   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6546 }
6547 
6548 void Assembler::pslld(XMMRegister dst, int shift) {
6549   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6550   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6551   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6552   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6553   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6554 }
6555 
6556 void Assembler::psllq(XMMRegister dst, int shift) {
6557   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6558   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6559   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6560   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6561   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6562 }
6563 
6564 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
6565   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6566   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6567   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6568   emit_int16((unsigned char)0xF1, (0xC0 | encode));
6569 }
6570 
6571 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
6572   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6573   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6574   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6575   emit_int16((unsigned char)0xF2, (0xC0 | encode));
6576 }
6577 
6578 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
6579   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6580   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6581   attributes.set_rex_vex_w_reverted();
6582   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6583   emit_int16((unsigned char)0xF3, (0xC0 | encode));
6584 }
6585 
6586 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6587   assert(UseAVX > 0, "requires some form of AVX");
6588   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6589   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
6590   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6591   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6592 }
6593 
6594 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6595   assert(UseAVX > 0, "requires some form of AVX");
6596   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6597   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6598   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
6599   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6600   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6601 }
6602 
6603 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6604   assert(UseAVX > 0, "requires some form of AVX");
6605   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6606   attributes.set_rex_vex_w_reverted();
6607   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
6608   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6609   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6610 }
6611 
6612 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6613   assert(UseAVX > 0, "requires some form of AVX");
6614   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6615   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6616   emit_int16((unsigned char)0xF1, (0xC0 | encode));
6617 }
6618 
6619 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6620   assert(UseAVX > 0, "requires some form of AVX");
6621   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6622   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6623   emit_int16((unsigned char)0xF2, (0xC0 | encode));
6624 }
6625 
6626 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6627   assert(UseAVX > 0, "requires some form of AVX");
6628   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6629   attributes.set_rex_vex_w_reverted();
6630   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6631   emit_int16((unsigned char)0xF3, (0xC0 | encode));
6632 }
6633 
6634 // Shift packed integers logically right by specified number of bits.
6635 void Assembler::psrlw(XMMRegister dst, int shift) {
6636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6637   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6638   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6639   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6640   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6641 }
6642 
6643 void Assembler::psrld(XMMRegister dst, int shift) {
6644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6645   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6646   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6647   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6648   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6649 }
6650 
6651 void Assembler::psrlq(XMMRegister dst, int shift) {
6652   // Do not confuse it with psrldq SSE2 instruction which
6653   // shifts 128 bit value in xmm register by number of bytes.
6654   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6655   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6656   attributes.set_rex_vex_w_reverted();
6657   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6658   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6659   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6660 }
6661 
6662 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
6663   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6664   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6665   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6666   emit_int16((unsigned char)0xD1, (0xC0 | encode));
6667 }
6668 
6669 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
6670   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6671   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6672   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6673   emit_int16((unsigned char)0xD2, (0xC0 | encode));
6674 }
6675 
6676 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
6677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6678   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6679   attributes.set_rex_vex_w_reverted();
6680   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6681   emit_int16((unsigned char)0xD3, (0xC0 | encode));
6682 }
6683 
6684 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6685   assert(UseAVX > 0, "requires some form of AVX");
6686   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6687   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
6688   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6689   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6690 }
6691 
6692 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6693   assert(UseAVX > 0, "requires some form of AVX");
6694   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6695   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6696   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6697   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6698 }
6699 
6700 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6701   assert(UseAVX > 0, "requires some form of AVX");
6702   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6703   attributes.set_rex_vex_w_reverted();
6704   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6705   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6706   emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
6707 }
6708 
6709 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6710   assert(UseAVX > 0, "requires some form of AVX");
6711   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6712   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6713   emit_int16((unsigned char)0xD1, (0xC0 | encode));
6714 }
6715 
6716 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6717   assert(UseAVX > 0, "requires some form of AVX");
6718   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6719   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6720   emit_int16((unsigned char)0xD2, (0xC0 | encode));
6721 }
6722 
6723 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6724   assert(UseAVX > 0, "requires some form of AVX");
6725   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6726   attributes.set_rex_vex_w_reverted();
6727   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6728   emit_int16((unsigned char)0xD3, (0xC0 | encode));
6729 }
6730 
6731 void Assembler::evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6732   assert(VM_Version::supports_avx512bw(), "");
6733   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6734   attributes.set_is_evex_instruction();
6735   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6736   emit_int16(0x10, (0xC0 | encode));
6737 }
6738 
6739 void Assembler::evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6740   assert(VM_Version::supports_avx512bw(), "");
6741   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6742   attributes.set_is_evex_instruction();
6743   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6744   emit_int16(0x12, (0xC0 | encode));
6745 }
6746 
6747 // Shift packed integers arithmetically right by specified number of bits.
6748 void Assembler::psraw(XMMRegister dst, int shift) {
6749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6750   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6751   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6752   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6753   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6754 }
6755 
6756 void Assembler::psrad(XMMRegister dst, int shift) {
6757   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6758   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6759   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6760   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6761   emit_int8(0x72);
6762   emit_int8((0xC0 | encode));
6763   emit_int8(shift & 0xFF);
6764 }
6765 
6766 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6767   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6768   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6769   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6770   emit_int16((unsigned char)0xE1, (0xC0 | encode));
6771 }
6772 
6773 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6774   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6775   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6776   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6777   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6778 }
6779 
6780 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6781   assert(UseAVX > 0, "requires some form of AVX");
6782   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6783   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6784   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6785   emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
6786 }
6787 
6788 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6789   assert(UseAVX > 0, "requires some form of AVX");
6790   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6791   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6792   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6793   emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
6794 }
6795 
6796 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6797   assert(UseAVX > 0, "requires some form of AVX");
6798   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6799   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6800   emit_int16((unsigned char)0xE1, (0xC0 | encode));
6801 }
6802 
6803 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6804   assert(UseAVX > 0, "requires some form of AVX");
6805   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6806   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6807   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6808 }
6809 
6810 void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6811   assert(UseAVX > 2, "requires AVX512");
6812   assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
6813   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6814   attributes.set_is_evex_instruction();
6815   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6816   emit_int24((unsigned char)0x72, (0xC0 | encode), shift & 0xFF);
6817 }
6818 
6819 void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6820   assert(UseAVX > 2, "requires AVX512");
6821   assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
6822   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6823   attributes.set_is_evex_instruction();
6824   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6825   emit_int16((unsigned char)0xE2, (0xC0 | encode));
6826 }
6827 
6828 // logical operations packed integers
6829 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6830   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6831   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6832   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6833   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6834 }
6835 
6836 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6837   assert(UseAVX > 0, "requires some form of AVX");
6838   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6839   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6840   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6841 }
6842 
6843 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6844   assert(UseAVX > 0, "requires some form of AVX");
6845   InstructionMark im(this);
6846   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6847   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6848   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6849   emit_int8((unsigned char)0xDB);
6850   emit_operand(dst, src);
6851 }
6852 
6853 void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6854   assert(VM_Version::supports_evex(), "");
6855   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6856   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6857   emit_int16((unsigned char)0xDB, (0xC0 | encode));
6858 }
6859 
6860 //Variable Shift packed integers logically left.
6861 void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6862   assert(UseAVX > 1, "requires AVX2");
6863   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6864   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6865   emit_int16(0x47, (0xC0 | encode));
6866 }
6867 
6868 void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6869   assert(UseAVX > 1, "requires AVX2");
6870   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6871   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6872   emit_int16(0x47, (0xC0 | encode));
6873 }
6874 
6875 //Variable Shift packed integers logically right.
6876 void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6877   assert(UseAVX > 1, "requires AVX2");
6878   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6879   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6880   emit_int16(0x45, (0xC0 | encode));
6881 }
6882 
6883 void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6884   assert(UseAVX > 1, "requires AVX2");
6885   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6886   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6887   emit_int16(0x45, (0xC0 | encode));
6888 }
6889 
6890 //Variable right Shift arithmetic packed integers .
6891 void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6892   assert(UseAVX > 1, "requires AVX2");
6893   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6894   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6895   emit_int16(0x46, (0xC0 | encode));
6896 }
6897 
6898 void Assembler::evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6899   assert(VM_Version::supports_avx512bw(), "");
6900   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6901   attributes.set_is_evex_instruction();
6902   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6903   emit_int16(0x11, (0xC0 | encode));
6904 }
6905 
6906 void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6907   assert(UseAVX > 2, "requires AVX512");
6908   assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
6909   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6910   attributes.set_is_evex_instruction();
6911   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6912   emit_int16(0x46, (0xC0 | encode));
6913 }
6914 
6915 void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6916   assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
6917   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6918   attributes.set_is_evex_instruction();
6919   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6920   emit_int16(0x71, (0xC0 | encode));
6921 }
6922 
6923 void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6924   assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
6925   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6926   attributes.set_is_evex_instruction();
6927   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6928   emit_int16(0x73, (0xC0 | encode));
6929 }
6930 
6931 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6933   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6934   attributes.set_rex_vex_w_reverted();
6935   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6936   emit_int16((unsigned char)0xDF, (0xC0 | encode));
6937 }
6938 
6939 void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6940   assert(UseAVX > 0, "requires some form of AVX");
6941   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6942   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6943   emit_int16((unsigned char)0xDF, (0xC0 | encode));
6944 }
6945 
6946 void Assembler::por(XMMRegister dst, XMMRegister src) {
6947   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6948   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6949   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6950   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6951 }
6952 
6953 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6954   assert(UseAVX > 0, "requires some form of AVX");
6955   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6956   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6957   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6958 }
6959 
6960 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6961   assert(UseAVX > 0, "requires some form of AVX");
6962   InstructionMark im(this);
6963   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6964   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6965   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6966   emit_int8((unsigned char)0xEB);
6967   emit_operand(dst, src);
6968 }
6969 
6970 void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6971   assert(VM_Version::supports_evex(), "");
6972   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6973   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6974   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6975 }
6976 
6977 
6978 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
6979   assert(VM_Version::supports_evex(), "");
6980   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6981   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6982   attributes.set_is_evex_instruction();
6983   attributes.set_embedded_opmask_register_specifier(mask);
6984   if (merge) {
6985     attributes.reset_is_clear_context();
6986   }
6987   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6988   emit_int16((unsigned char)0xEB, (0xC0 | encode));
6989 }
6990 
6991 void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
6992   assert(VM_Version::supports_evex(), "");
6993   // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
6994   InstructionMark im(this);
6995   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6996   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
6997   attributes.set_is_evex_instruction();
6998   attributes.set_embedded_opmask_register_specifier(mask);
6999   if (merge) {
7000     attributes.reset_is_clear_context();
7001   }
7002   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7003   emit_int8((unsigned char)0xEB);
7004   emit_operand(dst, src);
7005 }
7006 
7007 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
7008   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7009   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7010   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7011   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7012 }
7013 
7014 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7015   assert(UseAVX > 0, "requires some form of AVX");
7016   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7017   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7018   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7019 }
7020 
7021 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
7022   assert(UseAVX > 0, "requires some form of AVX");
7023   InstructionMark im(this);
7024   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7025   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
7026   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7027   emit_int8((unsigned char)0xEF);
7028   emit_operand(dst, src);
7029 }
7030 
7031 void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7032   assert(UseAVX > 2, "requires some form of EVEX");
7033   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7034   attributes.set_rex_vex_w_reverted();
7035   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7036   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7037 }
7038 
7039 void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
7040   assert(VM_Version::supports_evex(), "");
7041   // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
7042   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7043   attributes.set_is_evex_instruction();
7044   attributes.set_embedded_opmask_register_specifier(mask);
7045   if (merge) {
7046     attributes.reset_is_clear_context();
7047   }
7048   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7049   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7050 }
7051 
7052 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
7053   assert(VM_Version::supports_evex(), "requires EVEX support");
7054   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7055   attributes.set_is_evex_instruction();
7056   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7057   emit_int16((unsigned char)0xEF, (0xC0 | encode));
7058 }
7059 
7060 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
7061   assert(VM_Version::supports_evex(), "requires EVEX support");
7062   assert(dst != xnoreg, "sanity");
7063   InstructionMark im(this);
7064   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7065   attributes.set_is_evex_instruction();
7066   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
7067   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7068   emit_int8((unsigned char)0xEF);
7069   emit_operand(dst, src);
7070 }
7071 
7072 // vinserti forms
7073 
7074 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7075   assert(VM_Version::supports_avx2(), "");
7076   assert(imm8 <= 0x01, "imm8: %u", imm8);
7077   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7078   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7079   // last byte:
7080   // 0x00 - insert into lower 128 bits
7081   // 0x01 - insert into upper 128 bits
7082   emit_int24(0x38, (0xC0 | encode), imm8 & 0x01);
7083 }
7084 
7085 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7086   assert(VM_Version::supports_avx2(), "");
7087   assert(dst != xnoreg, "sanity");
7088   assert(imm8 <= 0x01, "imm8: %u", imm8);
7089   InstructionMark im(this);
7090   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7091   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7092   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7093   emit_int8(0x38);
7094   emit_operand(dst, src);
7095   // 0x00 - insert into lower 128 bits
7096   // 0x01 - insert into upper 128 bits
7097   emit_int8(imm8 & 0x01);
7098 }
7099 
7100 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7101   assert(VM_Version::supports_evex(), "");
7102   assert(imm8 <= 0x03, "imm8: %u", imm8);
7103   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7104   attributes.set_is_evex_instruction();
7105   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7106   // imm8:
7107   // 0x00 - insert into q0 128 bits (0..127)
7108   // 0x01 - insert into q1 128 bits (128..255)
7109   // 0x02 - insert into q2 128 bits (256..383)
7110   // 0x03 - insert into q3 128 bits (384..511)
7111   emit_int24(0x38, (0xC0 | encode), imm8 & 0x03);
7112 }
7113 
7114 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7115   assert(VM_Version::supports_avx(), "");
7116   assert(dst != xnoreg, "sanity");
7117   assert(imm8 <= 0x03, "imm8: %u", imm8);
7118   InstructionMark im(this);
7119   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7120   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7121   attributes.set_is_evex_instruction();
7122   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7123   emit_int8(0x18);
7124   emit_operand(dst, src);
7125   // 0x00 - insert into q0 128 bits (0..127)
7126   // 0x01 - insert into q1 128 bits (128..255)
7127   // 0x02 - insert into q2 128 bits (256..383)
7128   // 0x03 - insert into q3 128 bits (384..511)
7129   emit_int8(imm8 & 0x03);
7130 }
7131 
7132 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7133   assert(VM_Version::supports_evex(), "");
7134   assert(imm8 <= 0x01, "imm8: %u", imm8);
7135   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7136   attributes.set_is_evex_instruction();
7137   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7138   //imm8:
7139   // 0x00 - insert into lower 256 bits
7140   // 0x01 - insert into upper 256 bits
7141   emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01);
7142 }
7143 
7144 
7145 // vinsertf forms
7146 
7147 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7148   assert(VM_Version::supports_avx(), "");
7149   assert(imm8 <= 0x01, "imm8: %u", imm8);
7150   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7151   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7152   // imm8:
7153   // 0x00 - insert into lower 128 bits
7154   // 0x01 - insert into upper 128 bits
7155   emit_int24(0x18, (0xC0 | encode), imm8 & 0x01);
7156 }
7157 
7158 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7159   assert(VM_Version::supports_avx(), "");
7160   assert(dst != xnoreg, "sanity");
7161   assert(imm8 <= 0x01, "imm8: %u", imm8);
7162   InstructionMark im(this);
7163   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7164   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7165   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7166   emit_int8(0x18);
7167   emit_operand(dst, src);
7168   // 0x00 - insert into lower 128 bits
7169   // 0x01 - insert into upper 128 bits
7170   emit_int8(imm8 & 0x01);
7171 }
7172 
7173 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7174   assert(VM_Version::supports_avx2(), "");
7175   assert(imm8 <= 0x03, "imm8: %u", imm8);
7176   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7177   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7178   // imm8:
7179   // 0x00 - insert into q0 128 bits (0..127)
7180   // 0x01 - insert into q1 128 bits (128..255)
7181   // 0x02 - insert into q0 128 bits (256..383)
7182   // 0x03 - insert into q1 128 bits (384..512)
7183   emit_int24(0x18, (0xC0 | encode), imm8 & 0x03);
7184 }
7185 
7186 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7187   assert(VM_Version::supports_avx(), "");
7188   assert(dst != xnoreg, "sanity");
7189   assert(imm8 <= 0x03, "imm8: %u", imm8);
7190   InstructionMark im(this);
7191   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7192   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7193   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7194   emit_int8(0x18);
7195   emit_operand(dst, src);
7196   // 0x00 - insert into q0 128 bits (0..127)
7197   // 0x01 - insert into q1 128 bits (128..255)
7198   // 0x02 - insert into q0 128 bits (256..383)
7199   // 0x03 - insert into q1 128 bits (384..512)
7200   emit_int8(imm8 & 0x03);
7201 }
7202 
7203 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
7204   assert(VM_Version::supports_evex(), "");
7205   assert(imm8 <= 0x01, "imm8: %u", imm8);
7206   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7207   attributes.set_is_evex_instruction();
7208   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7209   // imm8:
7210   // 0x00 - insert into lower 256 bits
7211   // 0x01 - insert into upper 256 bits
7212   emit_int24(0x1A, (0xC0 | encode), imm8 & 0x01);
7213 }
7214 
7215 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
7216   assert(VM_Version::supports_evex(), "");
7217   assert(dst != xnoreg, "sanity");
7218   assert(imm8 <= 0x01, "imm8: %u", imm8);
7219   InstructionMark im(this);
7220   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7221   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7222   attributes.set_is_evex_instruction();
7223   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7224   emit_int8(0x1A);
7225   emit_operand(dst, src);
7226   // 0x00 - insert into lower 256 bits
7227   // 0x01 - insert into upper 256 bits
7228   emit_int8(imm8 & 0x01);
7229 }
7230 
7231 
7232 // vextracti forms
7233 
7234 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7235   assert(VM_Version::supports_avx2(), "");
7236   assert(imm8 <= 0x01, "imm8: %u", imm8);
7237   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7238   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7239   // imm8:
7240   // 0x00 - extract from lower 128 bits
7241   // 0x01 - extract from upper 128 bits
7242   emit_int24(0x39, (0xC0 | encode), imm8 & 0x01);
7243 }
7244 
7245 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
7246   assert(VM_Version::supports_avx2(), "");
7247   assert(src != xnoreg, "sanity");
7248   assert(imm8 <= 0x01, "imm8: %u", imm8);
7249   InstructionMark im(this);
7250   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7251   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7252   attributes.reset_is_clear_context();
7253   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7254   emit_int8(0x39);
7255   emit_operand(src, dst);
7256   // 0x00 - extract from lower 128 bits
7257   // 0x01 - extract from upper 128 bits
7258   emit_int8(imm8 & 0x01);
7259 }
7260 
7261 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7262   assert(VM_Version::supports_evex(), "");
7263   assert(imm8 <= 0x03, "imm8: %u", imm8);
7264   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7265   attributes.set_is_evex_instruction();
7266   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7267   // imm8:
7268   // 0x00 - extract from bits 127:0
7269   // 0x01 - extract from bits 255:128
7270   // 0x02 - extract from bits 383:256
7271   // 0x03 - extract from bits 511:384
7272   emit_int24(0x39, (0xC0 | encode), imm8 & 0x03);
7273 }
7274 
7275 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
7276   assert(VM_Version::supports_evex(), "");
7277   assert(src != xnoreg, "sanity");
7278   assert(imm8 <= 0x03, "imm8: %u", imm8);
7279   InstructionMark im(this);
7280   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7281   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7282   attributes.reset_is_clear_context();
7283   attributes.set_is_evex_instruction();
7284   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7285   emit_int8(0x39);
7286   emit_operand(src, dst);
7287   // 0x00 - extract from bits 127:0
7288   // 0x01 - extract from bits 255:128
7289   // 0x02 - extract from bits 383:256
7290   // 0x03 - extract from bits 511:384
7291   emit_int8(imm8 & 0x03);
7292 }
7293 
7294 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7295   assert(VM_Version::supports_avx512dq(), "");
7296   assert(imm8 <= 0x03, "imm8: %u", imm8);
7297   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7298   attributes.set_is_evex_instruction();
7299   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7300   // imm8:
7301   // 0x00 - extract from bits 127:0
7302   // 0x01 - extract from bits 255:128
7303   // 0x02 - extract from bits 383:256
7304   // 0x03 - extract from bits 511:384
7305   emit_int24(0x39, (0xC0 | encode), imm8 & 0x03);
7306 }
7307 
7308 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7309   assert(VM_Version::supports_evex(), "");
7310   assert(imm8 <= 0x01, "imm8: %u", imm8);
7311   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7312   attributes.set_is_evex_instruction();
7313   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7314   // imm8:
7315   // 0x00 - extract from lower 256 bits
7316   // 0x01 - extract from upper 256 bits
7317   emit_int24(0x3B, (0xC0 | encode), imm8 & 0x01);
7318 }
7319 
7320 void Assembler::vextracti64x4(Address dst, XMMRegister src, uint8_t imm8) {
7321   assert(VM_Version::supports_evex(), "");
7322   assert(src != xnoreg, "sanity");
7323   assert(imm8 <= 0x01, "imm8: %u", imm8);
7324   InstructionMark im(this);
7325   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7326   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
7327   attributes.reset_is_clear_context();
7328   attributes.set_is_evex_instruction();
7329   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7330   emit_int8(0x38);
7331   emit_operand(src, dst);
7332   // 0x00 - extract from lower 256 bits
7333   // 0x01 - extract from upper 256 bits
7334   emit_int8(imm8 & 0x01);
7335 }
7336 // vextractf forms
7337 
7338 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7339   assert(VM_Version::supports_avx(), "");
7340   assert(imm8 <= 0x01, "imm8: %u", imm8);
7341   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7342   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7343   // imm8:
7344   // 0x00 - extract from lower 128 bits
7345   // 0x01 - extract from upper 128 bits
7346   emit_int24(0x19, (0xC0 | encode), imm8 & 0x01);
7347 }
7348 
7349 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
7350   assert(VM_Version::supports_avx(), "");
7351   assert(src != xnoreg, "sanity");
7352   assert(imm8 <= 0x01, "imm8: %u", imm8);
7353   InstructionMark im(this);
7354   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7355   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7356   attributes.reset_is_clear_context();
7357   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7358   emit_int8(0x19);
7359   emit_operand(src, dst);
7360   // 0x00 - extract from lower 128 bits
7361   // 0x01 - extract from upper 128 bits
7362   emit_int8(imm8 & 0x01);
7363 }
7364 
7365 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7366   assert(VM_Version::supports_evex(), "");
7367   assert(imm8 <= 0x03, "imm8: %u", imm8);
7368   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7369   attributes.set_is_evex_instruction();
7370   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7371   // imm8:
7372   // 0x00 - extract from bits 127:0
7373   // 0x01 - extract from bits 255:128
7374   // 0x02 - extract from bits 383:256
7375   // 0x03 - extract from bits 511:384
7376   emit_int24(0x19, (0xC0 | encode), imm8 & 0x03);
7377 }
7378 
7379 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
7380   assert(VM_Version::supports_evex(), "");
7381   assert(src != xnoreg, "sanity");
7382   assert(imm8 <= 0x03, "imm8: %u", imm8);
7383   InstructionMark im(this);
7384   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7385   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
7386   attributes.reset_is_clear_context();
7387   attributes.set_is_evex_instruction();
7388   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7389   emit_int8(0x19);
7390   emit_operand(src, dst);
7391   // 0x00 - extract from bits 127:0
7392   // 0x01 - extract from bits 255:128
7393   // 0x02 - extract from bits 383:256
7394   // 0x03 - extract from bits 511:384
7395   emit_int8(imm8 & 0x03);
7396 }
7397 
7398 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7399   assert(VM_Version::supports_avx512dq(), "");
7400   assert(imm8 <= 0x03, "imm8: %u", imm8);
7401   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7402   attributes.set_is_evex_instruction();
7403   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7404   // imm8:
7405   // 0x00 - extract from bits 127:0
7406   // 0x01 - extract from bits 255:128
7407   // 0x02 - extract from bits 383:256
7408   // 0x03 - extract from bits 511:384
7409   emit_int24(0x19, (0xC0 | encode), imm8 & 0x03);
7410 }
7411 
7412 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
7413   assert(VM_Version::supports_evex(), "");
7414   assert(imm8 <= 0x01, "imm8: %u", imm8);
7415   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7416   attributes.set_is_evex_instruction();
7417   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7418   // imm8:
7419   // 0x00 - extract from lower 256 bits
7420   // 0x01 - extract from upper 256 bits
7421   emit_int24(0x1B, (0xC0 | encode), imm8 & 0x01);
7422 }
7423 
7424 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
7425   assert(VM_Version::supports_evex(), "");
7426   assert(src != xnoreg, "sanity");
7427   assert(imm8 <= 0x01, "imm8: %u", imm8);
7428   InstructionMark im(this);
7429   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7430   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
7431   attributes.reset_is_clear_context();
7432   attributes.set_is_evex_instruction();
7433   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7434   emit_int8(0x1B);
7435   emit_operand(src, dst);
7436   // 0x00 - extract from lower 256 bits
7437   // 0x01 - extract from upper 256 bits
7438   emit_int8(imm8 & 0x01);
7439 }
7440 
7441 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7442 void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
7443   assert(VM_Version::supports_avx2(), "");
7444   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7445   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7446   emit_int16(0x78, (0xC0 | encode));
7447 }
7448 
7449 void Assembler::vpbroadcastb(XMMRegister dst, Address src, int vector_len) {
7450   assert(VM_Version::supports_avx2(), "");
7451   assert(dst != xnoreg, "sanity");
7452   InstructionMark im(this);
7453   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7454   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
7455   // swap src<->dst for encoding
7456   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7457   emit_int8(0x78);
7458   emit_operand(dst, src);
7459 }
7460 
7461 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7462 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
7463   assert(VM_Version::supports_avx2(), "");
7464   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7465   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7466   emit_int16(0x79, (0xC0 | encode));
7467 }
7468 
7469 void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) {
7470   assert(VM_Version::supports_avx2(), "");
7471   assert(dst != xnoreg, "sanity");
7472   InstructionMark im(this);
7473   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7474   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
7475   // swap src<->dst for encoding
7476   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7477   emit_int8(0x79);
7478   emit_operand(dst, src);
7479 }
7480 
7481 // xmm/mem sourced byte/word/dword/qword replicate
7482 
7483 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7484 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
7485   assert(UseAVX >= 2, "");
7486   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7487   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7488   emit_int16(0x58, (0xC0 | encode));
7489 }
7490 
7491 void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) {
7492   assert(VM_Version::supports_avx2(), "");
7493   assert(dst != xnoreg, "sanity");
7494   InstructionMark im(this);
7495   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7496   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7497   // swap src<->dst for encoding
7498   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7499   emit_int8(0x58);
7500   emit_operand(dst, src);
7501 }
7502 
7503 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7504 void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
7505   assert(VM_Version::supports_avx2(), "");
7506   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7507   attributes.set_rex_vex_w_reverted();
7508   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7509   emit_int16(0x59, (0xC0 | encode));
7510 }
7511 
7512 void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
7513   assert(VM_Version::supports_avx2(), "");
7514   assert(dst != xnoreg, "sanity");
7515   InstructionMark im(this);
7516   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7517   attributes.set_rex_vex_w_reverted();
7518   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7519   // swap src<->dst for encoding
7520   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7521   emit_int8(0x59);
7522   emit_operand(dst, src);
7523 }
7524 void Assembler::evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len) {
7525   assert(vector_len != Assembler::AVX_128bit, "");
7526   assert(VM_Version::supports_avx512dq(), "");
7527   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7528   attributes.set_rex_vex_w_reverted();
7529   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7530   emit_int16(0x5A, (0xC0 | encode));
7531 }
7532 
7533 void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
7534   assert(vector_len != Assembler::AVX_128bit, "");
7535   assert(VM_Version::supports_avx512dq(), "");
7536   assert(dst != xnoreg, "sanity");
7537   InstructionMark im(this);
7538   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7539   attributes.set_rex_vex_w_reverted();
7540   attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
7541   // swap src<->dst for encoding
7542   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7543   emit_int8(0x5A);
7544   emit_operand(dst, src);
7545 }
7546 
7547 // scalar single/double precision replicate
7548 
7549 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
7550 void Assembler::vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
7551   assert(VM_Version::supports_avx2(), "");
7552   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7553   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7554   emit_int16(0x18, (0xC0 | encode));
7555 }
7556 
7557 void Assembler::vbroadcastss(XMMRegister dst, Address src, int vector_len) {
7558   assert(VM_Version::supports_avx(), "");
7559   assert(dst != xnoreg, "sanity");
7560   InstructionMark im(this);
7561   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7562   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7563   // swap src<->dst for encoding
7564   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7565   emit_int8(0x18);
7566   emit_operand(dst, src);
7567 }
7568 
7569 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
7570 void Assembler::vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
7571   assert(VM_Version::supports_avx2(), "");
7572   assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
7573   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7574   attributes.set_rex_vex_w_reverted();
7575   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7576   emit_int16(0x19, (0xC0 | encode));
7577 }
7578 
7579 void Assembler::vbroadcastsd(XMMRegister dst, Address src, int vector_len) {
7580   assert(VM_Version::supports_avx(), "");
7581   assert(vector_len == AVX_256bit || vector_len == AVX_512bit, "");
7582   assert(dst != xnoreg, "sanity");
7583   InstructionMark im(this);
7584   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7585   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
7586   attributes.set_rex_vex_w_reverted();
7587   // swap src<->dst for encoding
7588   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7589   emit_int8(0x19);
7590   emit_operand(dst, src);
7591 }
7592 
7593 
7594 // gpr source broadcast forms
7595 
7596 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7597 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
7598   assert(VM_Version::supports_avx512bw(), "");
7599   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7600   attributes.set_is_evex_instruction();
7601   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7602   emit_int16(0x7A, (0xC0 | encode));
7603 }
7604 
7605 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
7606 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
7607   assert(VM_Version::supports_avx512bw(), "");
7608   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
7609   attributes.set_is_evex_instruction();
7610   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7611   emit_int16(0x7B, (0xC0 | encode));
7612 }
7613 
7614 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
7615 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
7616   assert(VM_Version::supports_evex(), "");
7617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7618   attributes.set_is_evex_instruction();
7619   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7620   emit_int16(0x7C, (0xC0 | encode));
7621 }
7622 
7623 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
7624 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
7625   assert(VM_Version::supports_evex(), "");
7626   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7627   attributes.set_is_evex_instruction();
7628   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7629   emit_int16(0x7C, (0xC0 | encode));
7630 }
7631 
7632 void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7633   assert(VM_Version::supports_avx2(), "");
7634   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7635   assert(dst != xnoreg, "sanity");
7636   assert(src.isxmmindex(),"expected to be xmm index");
7637   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7638   InstructionMark im(this);
7639   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7640   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7641   emit_int8((unsigned char)0x90);
7642   emit_operand(dst, src);
7643 }
7644 
7645 void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7646   assert(VM_Version::supports_avx2(), "");
7647   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7648   assert(dst != xnoreg, "sanity");
7649   assert(src.isxmmindex(),"expected to be xmm index");
7650   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7651   InstructionMark im(this);
7652   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7653   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7654   emit_int8((unsigned char)0x90);
7655   emit_operand(dst, src);
7656 }
7657 
7658 void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7659   assert(VM_Version::supports_avx2(), "");
7660   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7661   assert(dst != xnoreg, "sanity");
7662   assert(src.isxmmindex(),"expected to be xmm index");
7663   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7664   InstructionMark im(this);
7665   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7666   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7667   emit_int8((unsigned char)0x92);
7668   emit_operand(dst, src);
7669 }
7670 
7671 void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
7672   assert(VM_Version::supports_avx2(), "");
7673   assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
7674   assert(dst != xnoreg, "sanity");
7675   assert(src.isxmmindex(),"expected to be xmm index");
7676   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7677   InstructionMark im(this);
7678   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
7679   vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7680   emit_int8((unsigned char)0x92);
7681   emit_operand(dst, src);
7682 }
7683 void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7684   assert(VM_Version::supports_evex(), "");
7685   assert(dst != xnoreg, "sanity");
7686   assert(src.isxmmindex(),"expected to be xmm index");
7687   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7688   assert(mask != k0, "instruction will #UD if mask is in k0");
7689   InstructionMark im(this);
7690   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7691   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7692   attributes.reset_is_clear_context();
7693   attributes.set_embedded_opmask_register_specifier(mask);
7694   attributes.set_is_evex_instruction();
7695   // swap src<->dst for encoding
7696   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7697   emit_int8((unsigned char)0x90);
7698   emit_operand(dst, src);
7699 }
7700 
7701 void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7702   assert(VM_Version::supports_evex(), "");
7703   assert(dst != xnoreg, "sanity");
7704   assert(src.isxmmindex(),"expected to be xmm index");
7705   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7706   assert(mask != k0, "instruction will #UD if mask is in k0");
7707   InstructionMark im(this);
7708   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7709   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7710   attributes.reset_is_clear_context();
7711   attributes.set_embedded_opmask_register_specifier(mask);
7712   attributes.set_is_evex_instruction();
7713   // swap src<->dst for encoding
7714   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7715   emit_int8((unsigned char)0x90);
7716   emit_operand(dst, src);
7717 }
7718 
7719 void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7720   assert(VM_Version::supports_evex(), "");
7721   assert(dst != xnoreg, "sanity");
7722   assert(src.isxmmindex(),"expected to be xmm index");
7723   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7724   assert(mask != k0, "instruction will #UD if mask is in k0");
7725   InstructionMark im(this);
7726   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7727   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7728   attributes.reset_is_clear_context();
7729   attributes.set_embedded_opmask_register_specifier(mask);
7730   attributes.set_is_evex_instruction();
7731   // swap src<->dst for encoding
7732   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7733   emit_int8((unsigned char)0x92);
7734   emit_operand(dst, src);
7735 }
7736 
7737 void Assembler::evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len) {
7738   assert(VM_Version::supports_evex(), "");
7739   assert(dst != xnoreg, "sanity");
7740   assert(src.isxmmindex(),"expected to be xmm index");
7741   assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
7742   assert(mask != k0, "instruction will #UD if mask is in k0");
7743   InstructionMark im(this);
7744   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7745   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7746   attributes.reset_is_clear_context();
7747   attributes.set_embedded_opmask_register_specifier(mask);
7748   attributes.set_is_evex_instruction();
7749   // swap src<->dst for encoding
7750   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7751   emit_int8((unsigned char)0x92);
7752   emit_operand(dst, src);
7753 }
7754 
7755 void Assembler::evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7756   assert(VM_Version::supports_evex(), "");
7757   assert(mask != k0, "instruction will #UD if mask is in k0");
7758   InstructionMark im(this);
7759   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7760   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7761   attributes.reset_is_clear_context();
7762   attributes.set_embedded_opmask_register_specifier(mask);
7763   attributes.set_is_evex_instruction();
7764   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7765   emit_int8((unsigned char)0xA0);
7766   emit_operand(src, dst);
7767 }
7768 
7769 void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7770   assert(VM_Version::supports_evex(), "");
7771   assert(mask != k0, "instruction will #UD if mask is in k0");
7772   InstructionMark im(this);
7773   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7774   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7775   attributes.reset_is_clear_context();
7776   attributes.set_embedded_opmask_register_specifier(mask);
7777   attributes.set_is_evex_instruction();
7778   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7779   emit_int8((unsigned char)0xA0);
7780   emit_operand(src, dst);
7781 }
7782 
7783 void Assembler::evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7784   assert(VM_Version::supports_evex(), "");
7785   assert(mask != k0, "instruction will #UD if mask is in k0");
7786   InstructionMark im(this);
7787   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7788   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7789   attributes.reset_is_clear_context();
7790   attributes.set_embedded_opmask_register_specifier(mask);
7791   attributes.set_is_evex_instruction();
7792   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7793   emit_int8((unsigned char)0xA2);
7794   emit_operand(src, dst);
7795 }
7796 
7797 void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
7798   assert(VM_Version::supports_evex(), "");
7799   assert(mask != k0, "instruction will #UD if mask is in k0");
7800   InstructionMark im(this);
7801   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
7802   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
7803   attributes.reset_is_clear_context();
7804   attributes.set_embedded_opmask_register_specifier(mask);
7805   attributes.set_is_evex_instruction();
7806   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7807   emit_int8((unsigned char)0xA2);
7808   emit_operand(src, dst);
7809 }
7810 // Carry-Less Multiplication Quadword
7811 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
7812   assert(VM_Version::supports_clmul(), "");
7813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7814   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7815   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7816 }
7817 
7818 // Carry-Less Multiplication Quadword
7819 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
7820   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
7821   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
7822   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7823   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7824 }
7825 
7826 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
7827   assert(VM_Version::supports_avx512_vpclmulqdq(), "Requires vector carryless multiplication support");
7828   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
7829   attributes.set_is_evex_instruction();
7830   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7831   emit_int24(0x44, (0xC0 | encode), (unsigned char)mask);
7832 }
7833 
7834 void Assembler::vzeroupper_uncached() {
7835   if (VM_Version::supports_vzeroupper()) {
7836     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7837     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7838     emit_int8(0x77);
7839   }
7840 }
7841 
7842 #ifndef _LP64
7843 // 32bit only pieces of the assembler
7844 
7845 void Assembler::emms() {
7846   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
7847   emit_int16(0x0F, 0x77);
7848 }
7849 
7850 void Assembler::vzeroupper() {
7851   vzeroupper_uncached();
7852 }
7853 
7854 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7855   // NO PREFIX AS NEVER 64BIT
7856   InstructionMark im(this);
7857   emit_int16((unsigned char)0x81, (0xF8 | src1->encoding()));
7858   emit_data(imm32, rspec, 0);
7859 }
7860 
7861 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7862   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
7863   InstructionMark im(this);
7864   emit_int8((unsigned char)0x81);
7865   emit_operand(rdi, src1);
7866   emit_data(imm32, rspec, 0);
7867 }
7868 
7869 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
7870 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
7871 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
7872 void Assembler::cmpxchg8(Address adr) {
7873   InstructionMark im(this);
7874   emit_int16(0x0F, (unsigned char)0xC7);
7875   emit_operand(rcx, adr);
7876 }
7877 
7878 void Assembler::decl(Register dst) {
7879   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7880  emit_int8(0x48 | dst->encoding());
7881 }
7882 
7883 // 64bit doesn't use the x87
7884 
7885 void Assembler::emit_operand32(Register reg, Address adr) {
7886   assert(reg->encoding() < 8, "no extended registers");
7887   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
7888   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
7889                adr._rspec);
7890 }
7891 
7892 void Assembler::emit_farith(int b1, int b2, int i) {
7893   assert(isByte(b1) && isByte(b2), "wrong opcode");
7894   assert(0 <= i &&  i < 8, "illegal stack offset");
7895   emit_int16(b1, b2 + i);
7896 }
7897 
7898 void Assembler::fabs() {
7899   emit_int16((unsigned char)0xD9, (unsigned char)0xE1);
7900 }
7901 
7902 void Assembler::fadd(int i) {
7903   emit_farith(0xD8, 0xC0, i);
7904 }
7905 
7906 void Assembler::fadd_d(Address src) {
7907   InstructionMark im(this);
7908   emit_int8((unsigned char)0xDC);
7909   emit_operand32(rax, src);
7910 }
7911 
7912 void Assembler::fadd_s(Address src) {
7913   InstructionMark im(this);
7914   emit_int8((unsigned char)0xD8);
7915   emit_operand32(rax, src);
7916 }
7917 
7918 void Assembler::fadda(int i) {
7919   emit_farith(0xDC, 0xC0, i);
7920 }
7921 
7922 void Assembler::faddp(int i) {
7923   emit_farith(0xDE, 0xC0, i);
7924 }
7925 
7926 void Assembler::fchs() {
7927   emit_int16((unsigned char)0xD9, (unsigned char)0xE0);
7928 }
7929 
7930 void Assembler::fcom(int i) {
7931   emit_farith(0xD8, 0xD0, i);
7932 }
7933 
7934 void Assembler::fcomp(int i) {
7935   emit_farith(0xD8, 0xD8, i);
7936 }
7937 
7938 void Assembler::fcomp_d(Address src) {
7939   InstructionMark im(this);
7940   emit_int8((unsigned char)0xDC);
7941   emit_operand32(rbx, src);
7942 }
7943 
7944 void Assembler::fcomp_s(Address src) {
7945   InstructionMark im(this);
7946   emit_int8((unsigned char)0xD8);
7947   emit_operand32(rbx, src);
7948 }
7949 
7950 void Assembler::fcompp() {
7951   emit_int16((unsigned char)0xDE, (unsigned char)0xD9);
7952 }
7953 
7954 void Assembler::fcos() {
7955   emit_int16((unsigned char)0xD9, (unsigned char)0xFF);
7956 }
7957 
7958 void Assembler::fdecstp() {
7959   emit_int16((unsigned char)0xD9, (unsigned char)0xF6);
7960 }
7961 
7962 void Assembler::fdiv(int i) {
7963   emit_farith(0xD8, 0xF0, i);
7964 }
7965 
7966 void Assembler::fdiv_d(Address src) {
7967   InstructionMark im(this);
7968   emit_int8((unsigned char)0xDC);
7969   emit_operand32(rsi, src);
7970 }
7971 
7972 void Assembler::fdiv_s(Address src) {
7973   InstructionMark im(this);
7974   emit_int8((unsigned char)0xD8);
7975   emit_operand32(rsi, src);
7976 }
7977 
7978 void Assembler::fdiva(int i) {
7979   emit_farith(0xDC, 0xF8, i);
7980 }
7981 
7982 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
7983 //       is erroneous for some of the floating-point instructions below.
7984 
7985 void Assembler::fdivp(int i) {
7986   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
7987 }
7988 
7989 void Assembler::fdivr(int i) {
7990   emit_farith(0xD8, 0xF8, i);
7991 }
7992 
7993 void Assembler::fdivr_d(Address src) {
7994   InstructionMark im(this);
7995   emit_int8((unsigned char)0xDC);
7996   emit_operand32(rdi, src);
7997 }
7998 
7999 void Assembler::fdivr_s(Address src) {
8000   InstructionMark im(this);
8001   emit_int8((unsigned char)0xD8);
8002   emit_operand32(rdi, src);
8003 }
8004 
8005 void Assembler::fdivra(int i) {
8006   emit_farith(0xDC, 0xF0, i);
8007 }
8008 
8009 void Assembler::fdivrp(int i) {
8010   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
8011 }
8012 
8013 void Assembler::ffree(int i) {
8014   emit_farith(0xDD, 0xC0, i);
8015 }
8016 
8017 void Assembler::fild_d(Address adr) {
8018   InstructionMark im(this);
8019   emit_int8((unsigned char)0xDF);
8020   emit_operand32(rbp, adr);
8021 }
8022 
8023 void Assembler::fild_s(Address adr) {
8024   InstructionMark im(this);
8025   emit_int8((unsigned char)0xDB);
8026   emit_operand32(rax, adr);
8027 }
8028 
8029 void Assembler::fincstp() {
8030   emit_int16((unsigned char)0xD9, (unsigned char)0xF7);
8031 }
8032 
8033 void Assembler::finit() {
8034   emit_int24((unsigned char)0x9B, (unsigned char)0xDB, (unsigned char)0xE3);
8035 }
8036 
8037 void Assembler::fist_s(Address adr) {
8038   InstructionMark im(this);
8039   emit_int8((unsigned char)0xDB);
8040   emit_operand32(rdx, adr);
8041 }
8042 
8043 void Assembler::fistp_d(Address adr) {
8044   InstructionMark im(this);
8045   emit_int8((unsigned char)0xDF);
8046   emit_operand32(rdi, adr);
8047 }
8048 
8049 void Assembler::fistp_s(Address adr) {
8050   InstructionMark im(this);
8051   emit_int8((unsigned char)0xDB);
8052   emit_operand32(rbx, adr);
8053 }
8054 
8055 void Assembler::fld1() {
8056   emit_int16((unsigned char)0xD9, (unsigned char)0xE8);
8057 }
8058 
8059 void Assembler::fld_d(Address adr) {
8060   InstructionMark im(this);
8061   emit_int8((unsigned char)0xDD);
8062   emit_operand32(rax, adr);
8063 }
8064 
8065 void Assembler::fld_s(Address adr) {
8066   InstructionMark im(this);
8067   emit_int8((unsigned char)0xD9);
8068   emit_operand32(rax, adr);
8069 }
8070 
8071 
8072 void Assembler::fld_s(int index) {
8073   emit_farith(0xD9, 0xC0, index);
8074 }
8075 
8076 void Assembler::fld_x(Address adr) {
8077   InstructionMark im(this);
8078   emit_int8((unsigned char)0xDB);
8079   emit_operand32(rbp, adr);
8080 }
8081 
8082 void Assembler::fldcw(Address src) {
8083   InstructionMark im(this);
8084   emit_int8((unsigned char)0xD9);
8085   emit_operand32(rbp, src);
8086 }
8087 
8088 void Assembler::fldenv(Address src) {
8089   InstructionMark im(this);
8090   emit_int8((unsigned char)0xD9);
8091   emit_operand32(rsp, src);
8092 }
8093 
8094 void Assembler::fldlg2() {
8095   emit_int16((unsigned char)0xD9, (unsigned char)0xEC);
8096 }
8097 
8098 void Assembler::fldln2() {
8099   emit_int16((unsigned char)0xD9, (unsigned char)0xED);
8100 }
8101 
8102 void Assembler::fldz() {
8103   emit_int16((unsigned char)0xD9, (unsigned char)0xEE);
8104 }
8105 
8106 void Assembler::flog() {
8107   fldln2();
8108   fxch();
8109   fyl2x();
8110 }
8111 
8112 void Assembler::flog10() {
8113   fldlg2();
8114   fxch();
8115   fyl2x();
8116 }
8117 
8118 void Assembler::fmul(int i) {
8119   emit_farith(0xD8, 0xC8, i);
8120 }
8121 
8122 void Assembler::fmul_d(Address src) {
8123   InstructionMark im(this);
8124   emit_int8((unsigned char)0xDC);
8125   emit_operand32(rcx, src);
8126 }
8127 
8128 void Assembler::fmul_s(Address src) {
8129   InstructionMark im(this);
8130   emit_int8((unsigned char)0xD8);
8131   emit_operand32(rcx, src);
8132 }
8133 
8134 void Assembler::fmula(int i) {
8135   emit_farith(0xDC, 0xC8, i);
8136 }
8137 
8138 void Assembler::fmulp(int i) {
8139   emit_farith(0xDE, 0xC8, i);
8140 }
8141 
8142 void Assembler::fnsave(Address dst) {
8143   InstructionMark im(this);
8144   emit_int8((unsigned char)0xDD);
8145   emit_operand32(rsi, dst);
8146 }
8147 
8148 void Assembler::fnstcw(Address src) {
8149   InstructionMark im(this);
8150   emit_int16((unsigned char)0x9B, (unsigned char)0xD9);
8151   emit_operand32(rdi, src);
8152 }
8153 
8154 void Assembler::fnstsw_ax() {
8155   emit_int16((unsigned char)0xDF, (unsigned char)0xE0);
8156 }
8157 
8158 void Assembler::fprem() {
8159   emit_int16((unsigned char)0xD9, (unsigned char)0xF8);
8160 }
8161 
8162 void Assembler::fprem1() {
8163   emit_int16((unsigned char)0xD9, (unsigned char)0xF5);
8164 }
8165 
8166 void Assembler::frstor(Address src) {
8167   InstructionMark im(this);
8168   emit_int8((unsigned char)0xDD);
8169   emit_operand32(rsp, src);
8170 }
8171 
8172 void Assembler::fsin() {
8173   emit_int16((unsigned char)0xD9, (unsigned char)0xFE);
8174 }
8175 
8176 void Assembler::fsqrt() {
8177   emit_int16((unsigned char)0xD9, (unsigned char)0xFA);
8178 }
8179 
8180 void Assembler::fst_d(Address adr) {
8181   InstructionMark im(this);
8182   emit_int8((unsigned char)0xDD);
8183   emit_operand32(rdx, adr);
8184 }
8185 
8186 void Assembler::fst_s(Address adr) {
8187   InstructionMark im(this);
8188   emit_int8((unsigned char)0xD9);
8189   emit_operand32(rdx, adr);
8190 }
8191 
8192 void Assembler::fstp_d(Address adr) {
8193   InstructionMark im(this);
8194   emit_int8((unsigned char)0xDD);
8195   emit_operand32(rbx, adr);
8196 }
8197 
8198 void Assembler::fstp_d(int index) {
8199   emit_farith(0xDD, 0xD8, index);
8200 }
8201 
8202 void Assembler::fstp_s(Address adr) {
8203   InstructionMark im(this);
8204   emit_int8((unsigned char)0xD9);
8205   emit_operand32(rbx, adr);
8206 }
8207 
8208 void Assembler::fstp_x(Address adr) {
8209   InstructionMark im(this);
8210   emit_int8((unsigned char)0xDB);
8211   emit_operand32(rdi, adr);
8212 }
8213 
8214 void Assembler::fsub(int i) {
8215   emit_farith(0xD8, 0xE0, i);
8216 }
8217 
8218 void Assembler::fsub_d(Address src) {
8219   InstructionMark im(this);
8220   emit_int8((unsigned char)0xDC);
8221   emit_operand32(rsp, src);
8222 }
8223 
8224 void Assembler::fsub_s(Address src) {
8225   InstructionMark im(this);
8226   emit_int8((unsigned char)0xD8);
8227   emit_operand32(rsp, src);
8228 }
8229 
8230 void Assembler::fsuba(int i) {
8231   emit_farith(0xDC, 0xE8, i);
8232 }
8233 
8234 void Assembler::fsubp(int i) {
8235   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
8236 }
8237 
8238 void Assembler::fsubr(int i) {
8239   emit_farith(0xD8, 0xE8, i);
8240 }
8241 
8242 void Assembler::fsubr_d(Address src) {
8243   InstructionMark im(this);
8244   emit_int8((unsigned char)0xDC);
8245   emit_operand32(rbp, src);
8246 }
8247 
8248 void Assembler::fsubr_s(Address src) {
8249   InstructionMark im(this);
8250   emit_int8((unsigned char)0xD8);
8251   emit_operand32(rbp, src);
8252 }
8253 
8254 void Assembler::fsubra(int i) {
8255   emit_farith(0xDC, 0xE0, i);
8256 }
8257 
8258 void Assembler::fsubrp(int i) {
8259   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
8260 }
8261 
8262 void Assembler::ftan() {
8263   emit_int32((unsigned char)0xD9, (unsigned char)0xF2, (unsigned char)0xDD, (unsigned char)0xD8);
8264 }
8265 
8266 void Assembler::ftst() {
8267   emit_int16((unsigned char)0xD9, (unsigned char)0xE4);
8268 }
8269 
8270 void Assembler::fucomi(int i) {
8271   // make sure the instruction is supported (introduced for P6, together with cmov)
8272   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8273   emit_farith(0xDB, 0xE8, i);
8274 }
8275 
8276 void Assembler::fucomip(int i) {
8277   // make sure the instruction is supported (introduced for P6, together with cmov)
8278   guarantee(VM_Version::supports_cmov(), "illegal instruction");
8279   emit_farith(0xDF, 0xE8, i);
8280 }
8281 
8282 void Assembler::fwait() {
8283   emit_int8((unsigned char)0x9B);
8284 }
8285 
8286 void Assembler::fxch(int i) {
8287   emit_farith(0xD9, 0xC8, i);
8288 }
8289 
8290 void Assembler::fyl2x() {
8291   emit_int16((unsigned char)0xD9, (unsigned char)0xF1);
8292 }
8293 
8294 void Assembler::frndint() {
8295   emit_int16((unsigned char)0xD9, (unsigned char)0xFC);
8296 }
8297 
8298 void Assembler::f2xm1() {
8299   emit_int16((unsigned char)0xD9, (unsigned char)0xF0);
8300 }
8301 
8302 void Assembler::fldl2e() {
8303   emit_int16((unsigned char)0xD9, (unsigned char)0xEA);
8304 }
8305 #endif // !_LP64
8306 
8307 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
8308 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
8309 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
8310 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
8311 
8312 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
8313 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8314   if (pre > 0) {
8315     emit_int8(simd_pre[pre]);
8316   }
8317   if (rex_w) {
8318     prefixq(adr, xreg);
8319   } else {
8320     prefix(adr, xreg);
8321   }
8322   if (opc > 0) {
8323     emit_int8(0x0F);
8324     int opc2 = simd_opc[opc];
8325     if (opc2 > 0) {
8326       emit_int8(opc2);
8327     }
8328   }
8329 }
8330 
8331 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
8332   if (pre > 0) {
8333     emit_int8(simd_pre[pre]);
8334   }
8335   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
8336   if (opc > 0) {
8337     emit_int8(0x0F);
8338     int opc2 = simd_opc[opc];
8339     if (opc2 > 0) {
8340       emit_int8(opc2);
8341     }
8342   }
8343   return encode;
8344 }
8345 
8346 
8347 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
8348   int vector_len = _attributes->get_vector_len();
8349   bool vex_w = _attributes->is_rex_vex_w();
8350   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
8351     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
8352     byte1 = (~byte1) & 0xE0;
8353     byte1 |= opc;
8354 
8355     int byte2 = ((~nds_enc) & 0xf) << 3;
8356     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
8357 
8358     emit_int24((unsigned char)VEX_3bytes, byte1, byte2);
8359   } else {
8360     int byte1 = vex_r ? VEX_R : 0;
8361     byte1 = (~byte1) & 0x80;
8362     byte1 |= ((~nds_enc) & 0xf) << 3;
8363     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
8364     emit_int16((unsigned char)VEX_2bytes, byte1);
8365   }
8366 }
8367 
8368 // This is a 4 byte encoding
8369 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
8370   // EVEX 0x62 prefix
8371   // byte1 = EVEX_4bytes;
8372 
8373   bool vex_w = _attributes->is_rex_vex_w();
8374   int evex_encoding = (vex_w ? VEX_W : 0);
8375   // EVEX.b is not currently used for broadcast of single element or data rounding modes
8376   _attributes->set_evex_encoding(evex_encoding);
8377 
8378   // P0: byte 2, initialized to RXBR`00mm
8379   // instead of not'd
8380   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
8381   byte2 = (~byte2) & 0xF0;
8382   // confine opc opcode extensions in mm bits to lower two bits
8383   // of form {0F, 0F_38, 0F_3A}
8384   byte2 |= opc;
8385 
8386   // P1: byte 3 as Wvvvv1pp
8387   int byte3 = ((~nds_enc) & 0xf) << 3;
8388   // p[10] is always 1
8389   byte3 |= EVEX_F;
8390   byte3 |= (vex_w & 1) << 7;
8391   // confine pre opcode extensions in pp bits to lower two bits
8392   // of form {66, F3, F2}
8393   byte3 |= pre;
8394 
8395   // P2: byte 4 as zL'Lbv'aaa
8396   // kregs are implemented in the low 3 bits as aaa
8397   int byte4 = (_attributes->is_no_reg_mask()) ?
8398               0 :
8399               _attributes->get_embedded_opmask_register_specifier();
8400   // EVEX.v` for extending EVEX.vvvv or VIDX
8401   byte4 |= (evex_v ? 0: EVEX_V);
8402   // third EXEC.b for broadcast actions
8403   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
8404   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
8405   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
8406   // last is EVEX.z for zero/merge actions
8407   if (_attributes->is_no_reg_mask() == false &&
8408       _attributes->get_embedded_opmask_register_specifier() != 0) {
8409     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
8410   }
8411 
8412   emit_int32(EVEX_4bytes, byte2, byte3, byte4);
8413 }
8414 
8415 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8416   bool vex_r = (xreg_enc & 8) == 8;
8417   bool vex_b = adr.base_needs_rex();
8418   bool vex_x;
8419   if (adr.isxmmindex()) {
8420     vex_x = adr.xmmindex_needs_rex();
8421   } else {
8422     vex_x = adr.index_needs_rex();
8423   }
8424   set_attributes(attributes);
8425   attributes->set_current_assembler(this);
8426 
8427   // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
8428   // is allowed in legacy mode and has resources which will fit in it.
8429   // Pure EVEX instructions will have is_evex_instruction set in their definition.
8430   if (!attributes->is_legacy_mode()) {
8431     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
8432       if ((attributes->get_vector_len() != AVX_512bit) && (nds_enc < 16) && (xreg_enc < 16)) {
8433           attributes->set_is_legacy_mode();
8434       }
8435     }
8436   }
8437 
8438   if (UseAVX > 2) {
8439     assert(((!attributes->uses_vl()) ||
8440             (attributes->get_vector_len() == AVX_512bit) ||
8441             (!_legacy_mode_vl) ||
8442             (attributes->is_legacy_mode())),"XMM register should be 0-15");
8443     assert(((nds_enc < 16 && xreg_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
8444   }
8445 
8446   clear_managed();
8447   if (UseAVX > 2 && !attributes->is_legacy_mode())
8448   {
8449     bool evex_r = (xreg_enc >= 16);
8450     bool evex_v;
8451     // EVEX.V' is set to true when VSIB is used as we may need to use higher order XMM registers (16-31)
8452     if (adr.isxmmindex())  {
8453       evex_v = ((adr._xmmindex->encoding() > 15) ? true : false);
8454     } else {
8455       evex_v = (nds_enc >= 16);
8456     }
8457     attributes->set_is_evex_instruction();
8458     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8459   } else {
8460     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8461       attributes->set_rex_vex_w(false);
8462     }
8463     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8464   }
8465 }
8466 
8467 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
8468   bool vex_r = (dst_enc & 8) == 8;
8469   bool vex_b = (src_enc & 8) == 8;
8470   bool vex_x = false;
8471   set_attributes(attributes);
8472   attributes->set_current_assembler(this);
8473 
8474   // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
8475   // is allowed in legacy mode and has resources which will fit in it.
8476   // Pure EVEX instructions will have is_evex_instruction set in their definition.
8477   if (!attributes->is_legacy_mode()) {
8478     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
8479       if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) &&
8480           (dst_enc < 16) && (nds_enc < 16) && (src_enc < 16)) {
8481           attributes->set_is_legacy_mode();
8482       }
8483     }
8484   }
8485 
8486   if (UseAVX > 2) {
8487     // All the scalar fp instructions (with uses_vl as false) can have legacy_mode as false
8488     // Instruction with uses_vl true are vector instructions
8489     // All the vector instructions with AVX_512bit length can have legacy_mode as false
8490     // All the vector instructions with < AVX_512bit length can have legacy_mode as false if AVX512vl() is supported
8491     // Rest all should have legacy_mode set as true
8492     assert(((!attributes->uses_vl()) ||
8493             (attributes->get_vector_len() == AVX_512bit) ||
8494             (!_legacy_mode_vl) ||
8495             (attributes->is_legacy_mode())),"XMM register should be 0-15");
8496     // Instruction with legacy_mode true should have dst, nds and src < 15
8497     assert(((dst_enc < 16 && nds_enc < 16 && src_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
8498   }
8499 
8500   clear_managed();
8501   if (UseAVX > 2 && !attributes->is_legacy_mode())
8502   {
8503     bool evex_r = (dst_enc >= 16);
8504     bool evex_v = (nds_enc >= 16);
8505     // can use vex_x as bank extender on rm encoding
8506     vex_x = (src_enc >= 16);
8507     attributes->set_is_evex_instruction();
8508     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
8509   } else {
8510     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
8511       attributes->set_rex_vex_w(false);
8512     }
8513     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
8514   }
8515 
8516   // return modrm byte components for operands
8517   return (((dst_enc & 7) << 3) | (src_enc & 7));
8518 }
8519 
8520 
8521 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
8522                             VexOpcode opc, InstructionAttr *attributes) {
8523   if (UseAVX > 0) {
8524     int xreg_enc = xreg->encoding();
8525     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8526     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
8527   } else {
8528     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
8529     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
8530   }
8531 }
8532 
8533 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
8534                                       VexOpcode opc, InstructionAttr *attributes) {
8535   int dst_enc = dst->encoding();
8536   int src_enc = src->encoding();
8537   if (UseAVX > 0) {
8538     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
8539     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
8540   } else {
8541     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
8542     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
8543   }
8544 }
8545 
8546 void Assembler::vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8547   assert(VM_Version::supports_avx(), "");
8548   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8549   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8550   emit_int16(0x5F, (0xC0 | encode));
8551 }
8552 
8553 void Assembler::vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8554   assert(VM_Version::supports_avx(), "");
8555   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8556   attributes.set_rex_vex_w_reverted();
8557   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8558   emit_int16(0x5F, (0xC0 | encode));
8559 }
8560 
8561 void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8562   assert(VM_Version::supports_avx(), "");
8563   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8564   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8565   emit_int16(0x5D, (0xC0 | encode));
8566 }
8567 
8568 void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
8569   assert(VM_Version::supports_avx(), "");
8570   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8571   attributes.set_rex_vex_w_reverted();
8572   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8573   emit_int16(0x5D, (0xC0 | encode));
8574 }
8575 
8576 void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
8577   assert(VM_Version::supports_avx(), "");
8578   assert(vector_len <= AVX_256bit, "");
8579   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8580   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8581   emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop));
8582 }
8583 
8584 void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8585   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8586   assert(vector_len <= AVX_256bit, "");
8587   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8588   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8589   int src2_enc = src2->encoding();
8590   emit_int24(0x4B, (0xC0 | encode), (0xF0 & src2_enc << 4));
8591 }
8592 
8593 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8594   assert(VM_Version::supports_avx2(), "");
8595   assert(vector_len <= AVX_256bit, "");
8596   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8597   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8598   emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
8599 }
8600 
8601 void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
8602   assert(VM_Version::supports_avx(), "");
8603   assert(vector_len <= AVX_256bit, "");
8604   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8605   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8606   emit_int24((unsigned char)0xC2, (0xC0 | encode), (unsigned char)comparison);
8607 }
8608 
8609 void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8610                         ComparisonPredicateFP comparison, int vector_len) {
8611   assert(VM_Version::supports_evex(), "");
8612   // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
8613   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8614   attributes.set_is_evex_instruction();
8615   attributes.set_embedded_opmask_register_specifier(mask);
8616   attributes.reset_is_clear_context();
8617   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
8618   emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
8619 }
8620 
8621 void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8622                         ComparisonPredicateFP comparison, int vector_len) {
8623   assert(VM_Version::supports_evex(), "");
8624   // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
8625   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8626   attributes.set_is_evex_instruction();
8627   attributes.set_embedded_opmask_register_specifier(mask);
8628   attributes.reset_is_clear_context();
8629   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8630   emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
8631 }
8632 
8633 void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
8634   assert(VM_Version::supports_sse4_1(), "");
8635   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8636   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8637   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8638   emit_int16(0x14, (0xC0 | encode));
8639 }
8640 
8641 void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
8642   assert(VM_Version::supports_sse4_1(), "");
8643   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8644   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8645   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8646   emit_int16(0x15, (0xC0 | encode));
8647 }
8648 
8649 void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
8650   assert(VM_Version::supports_sse4_1(), "");
8651   assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
8652   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8653   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8654   emit_int16(0x10, (0xC0 | encode));
8655 }
8656 
8657 void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
8658   assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
8659   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8660   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8661   int src2_enc = src2->encoding();
8662   emit_int24(0x4A, (0xC0 | encode), (0xF0 & src2_enc << 4));
8663 }
8664 
8665 void Assembler::vblendps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
8666   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8667   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8668   emit_int24(0x0C, (0xC0 | encode), imm8);
8669 }
8670 
8671 void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8672   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8673   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8674   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8675   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8676   emit_int16(0x64, (0xC0 | encode));
8677 }
8678 
8679 void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8680   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8681   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8682   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8683   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8684   emit_int16(0x65, (0xC0 | encode));
8685 }
8686 
8687 void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8688   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8689   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8690   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8691   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8692   emit_int16(0x66, (0xC0 | encode));
8693 }
8694 
8695 void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
8696   assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
8697   assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
8698   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8699   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8700   emit_int16(0x37, (0xC0 | encode));
8701 }
8702 
8703 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8704                         int comparison, int vector_len) {
8705   assert(VM_Version::supports_evex(), "");
8706   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8707   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8708   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8709   attributes.set_is_evex_instruction();
8710   attributes.set_embedded_opmask_register_specifier(mask);
8711   attributes.reset_is_clear_context();
8712   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8713   emit_int24(0x1F, (0xC0 | encode), comparison);
8714 }
8715 
8716 void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8717                         int comparison, int vector_len) {
8718   assert(VM_Version::supports_evex(), "");
8719   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8720   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
8721   InstructionMark im(this);
8722   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8723   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8724   attributes.set_is_evex_instruction();
8725   attributes.set_embedded_opmask_register_specifier(mask);
8726   attributes.reset_is_clear_context();
8727   int dst_enc = kdst->encoding();
8728   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8729   emit_int8((unsigned char)0x1F);
8730   emit_operand(as_Register(dst_enc), src);
8731   emit_int8((unsigned char)comparison);
8732 }
8733 
8734 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8735                         int comparison, int vector_len) {
8736   assert(VM_Version::supports_evex(), "");
8737   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8738   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8739   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8740   attributes.set_is_evex_instruction();
8741   attributes.set_embedded_opmask_register_specifier(mask);
8742   attributes.reset_is_clear_context();
8743   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8744   emit_int24(0x1F, (0xC0 | encode), comparison);
8745 }
8746 
8747 void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8748                         int comparison, int vector_len) {
8749   assert(VM_Version::supports_evex(), "");
8750   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8751   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
8752   InstructionMark im(this);
8753   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8754   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8755   attributes.set_is_evex_instruction();
8756   attributes.set_embedded_opmask_register_specifier(mask);
8757   attributes.reset_is_clear_context();
8758   int dst_enc = kdst->encoding();
8759   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8760   emit_int8((unsigned char)0x1F);
8761   emit_operand(as_Register(dst_enc), src);
8762   emit_int8((unsigned char)comparison);
8763 }
8764 
8765 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8766                         int comparison, int vector_len) {
8767   assert(VM_Version::supports_evex(), "");
8768   assert(VM_Version::supports_avx512bw(), "");
8769   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8770   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8771   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8772   attributes.set_is_evex_instruction();
8773   attributes.set_embedded_opmask_register_specifier(mask);
8774   attributes.reset_is_clear_context();
8775   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8776   emit_int24(0x3F, (0xC0 | encode), comparison);
8777 }
8778 
8779 void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8780                         int comparison, int vector_len) {
8781   assert(VM_Version::supports_evex(), "");
8782   assert(VM_Version::supports_avx512bw(), "");
8783   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8784   // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
8785   InstructionMark im(this);
8786   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8787   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8788   attributes.set_is_evex_instruction();
8789   attributes.set_embedded_opmask_register_specifier(mask);
8790   attributes.reset_is_clear_context();
8791   int dst_enc = kdst->encoding();
8792   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8793   emit_int8((unsigned char)0x3F);
8794   emit_operand(as_Register(dst_enc), src);
8795   emit_int8((unsigned char)comparison);
8796 }
8797 
8798 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
8799                         int comparison, int vector_len) {
8800   assert(VM_Version::supports_evex(), "");
8801   assert(VM_Version::supports_avx512bw(), "");
8802   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8803   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8804   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8805   attributes.set_is_evex_instruction();
8806   attributes.set_embedded_opmask_register_specifier(mask);
8807   attributes.reset_is_clear_context();
8808   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8809   emit_int24(0x3F, (0xC0 | encode), comparison);
8810 }
8811 
8812 void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
8813                         int comparison, int vector_len) {
8814   assert(VM_Version::supports_evex(), "");
8815   assert(VM_Version::supports_avx512bw(), "");
8816   assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
8817   // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
8818   InstructionMark im(this);
8819   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8820   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
8821   attributes.set_is_evex_instruction();
8822   attributes.set_embedded_opmask_register_specifier(mask);
8823   attributes.reset_is_clear_context();
8824   int dst_enc = kdst->encoding();
8825   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8826   emit_int8((unsigned char)0x3F);
8827   emit_operand(as_Register(dst_enc), src);
8828   emit_int8((unsigned char)comparison);
8829 }
8830 
8831 void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
8832   assert(VM_Version::supports_avx(), "");
8833   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8834   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8835   int mask_enc = mask->encoding();
8836   emit_int24(0x4C, (0xC0 | encode), 0xF0 & mask_enc << 4);
8837 }
8838 
8839 void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8840   assert(VM_Version::supports_evex(), "");
8841   // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
8842   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8843   attributes.set_is_evex_instruction();
8844   attributes.set_embedded_opmask_register_specifier(mask);
8845   if (merge) {
8846     attributes.reset_is_clear_context();
8847   }
8848   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8849   emit_int16(0x65, (0xC0 | encode));
8850 }
8851 
8852 void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8853   assert(VM_Version::supports_evex(), "");
8854   // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
8855   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8856   attributes.set_is_evex_instruction();
8857   attributes.set_embedded_opmask_register_specifier(mask);
8858   if (merge) {
8859     attributes.reset_is_clear_context();
8860   }
8861   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8862   emit_int16(0x65, (0xC0 | encode));
8863 }
8864 
8865 void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8866   assert(VM_Version::supports_evex(), "");
8867   assert(VM_Version::supports_avx512bw(), "");
8868   // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
8869   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8870   attributes.set_is_evex_instruction();
8871   attributes.set_embedded_opmask_register_specifier(mask);
8872   if (merge) {
8873     attributes.reset_is_clear_context();
8874   }
8875   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8876   emit_int16(0x66, (0xC0 | encode));
8877 }
8878 
8879 void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8880   assert(VM_Version::supports_evex(), "");
8881   assert(VM_Version::supports_avx512bw(), "");
8882   // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
8883   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
8884   attributes.set_is_evex_instruction();
8885   attributes.set_embedded_opmask_register_specifier(mask);
8886   if (merge) {
8887     attributes.reset_is_clear_context();
8888   }
8889   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8890   emit_int16(0x66, (0xC0 | encode));
8891 }
8892 
8893 void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8894   assert(VM_Version::supports_evex(), "");
8895   //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
8896   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8897   attributes.set_is_evex_instruction();
8898   attributes.set_embedded_opmask_register_specifier(mask);
8899   if (merge) {
8900     attributes.reset_is_clear_context();
8901   }
8902   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8903   emit_int16(0x64, (0xC0 | encode));
8904 }
8905 
8906 void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8907   assert(VM_Version::supports_evex(), "");
8908   //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
8909   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8910   attributes.set_is_evex_instruction();
8911   attributes.set_embedded_opmask_register_specifier(mask);
8912   if (merge) {
8913     attributes.reset_is_clear_context();
8914   }
8915   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8916   emit_int16(0x64, (0xC0 | encode));
8917 }
8918 
8919 void Assembler::shlxl(Register dst, Register src1, Register src2) {
8920   assert(VM_Version::supports_bmi2(), "");
8921   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8922   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8923   emit_int16((unsigned char)0xF7, (0xC0 | encode));
8924 }
8925 
8926 void Assembler::shlxq(Register dst, Register src1, Register src2) {
8927   assert(VM_Version::supports_bmi2(), "");
8928   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
8929   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
8930   emit_int16((unsigned char)0xF7, (0xC0 | encode));
8931 }
8932 
8933 #ifndef _LP64
8934 
8935 void Assembler::incl(Register dst) {
8936   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8937   emit_int8(0x40 | dst->encoding());
8938 }
8939 
8940 void Assembler::lea(Register dst, Address src) {
8941   leal(dst, src);
8942 }
8943 
8944 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
8945   InstructionMark im(this);
8946   emit_int8((unsigned char)0xC7);
8947   emit_operand(rax, dst);
8948   emit_data((int)imm32, rspec, 0);
8949 }
8950 
8951 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8952   InstructionMark im(this);
8953   int encode = prefix_and_encode(dst->encoding());
8954   emit_int8((0xB8 | encode));
8955   emit_data((int)imm32, rspec, 0);
8956 }
8957 
8958 void Assembler::popa() { // 32bit
8959   emit_int8(0x61);
8960 }
8961 
8962 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
8963   InstructionMark im(this);
8964   emit_int8(0x68);
8965   emit_data(imm32, rspec, 0);
8966 }
8967 
8968 void Assembler::pusha() { // 32bit
8969   emit_int8(0x60);
8970 }
8971 
8972 void Assembler::set_byte_if_not_zero(Register dst) {
8973   emit_int24(0x0F, (unsigned char)0x95, (0xE0 | dst->encoding()));
8974 }
8975 
8976 #else // LP64
8977 
8978 void Assembler::set_byte_if_not_zero(Register dst) {
8979   int enc = prefix_and_encode(dst->encoding(), true);
8980   emit_int24(0x0F, (unsigned char)0x95, (0xE0 | enc));
8981 }
8982 
8983 // 64bit only pieces of the assembler
8984 // This should only be used by 64bit instructions that can use rip-relative
8985 // it cannot be used by instructions that want an immediate value.
8986 
8987 bool Assembler::reachable(AddressLiteral adr) {
8988   int64_t disp;
8989   relocInfo::relocType relocType = adr.reloc();
8990 
8991   // None will force a 64bit literal to the code stream. Likely a placeholder
8992   // for something that will be patched later and we need to certain it will
8993   // always be reachable.
8994   if (relocType == relocInfo::none) {
8995     return false;
8996   }
8997   if (relocType == relocInfo::internal_word_type) {
8998     // This should be rip relative and easily reachable.
8999     return true;
9000   }
9001   if (relocType == relocInfo::virtual_call_type ||
9002       relocType == relocInfo::opt_virtual_call_type ||
9003       relocType == relocInfo::static_call_type ||
9004       relocType == relocInfo::static_stub_type ) {
9005     // This should be rip relative within the code cache and easily
9006     // reachable until we get huge code caches. (At which point
9007     // ic code is going to have issues).
9008     return true;
9009   }
9010   if (relocType != relocInfo::external_word_type &&
9011       relocType != relocInfo::poll_return_type &&  // these are really external_word but need special
9012       relocType != relocInfo::poll_type &&         // relocs to identify them
9013       relocType != relocInfo::runtime_call_type ) {
9014     return false;
9015   }
9016 
9017   // Stress the correction code
9018   if (ForceUnreachable) {
9019     // Must be runtimecall reloc, see if it is in the codecache
9020     // Flipping stuff in the codecache to be unreachable causes issues
9021     // with things like inline caches where the additional instructions
9022     // are not handled.
9023     if (CodeCache::find_blob(adr._target) == NULL) {
9024       return false;
9025     }
9026   }
9027   // For external_word_type/runtime_call_type if it is reachable from where we
9028   // are now (possibly a temp buffer) and where we might end up
9029   // anywhere in the codeCache then we are always reachable.
9030   // This would have to change if we ever save/restore shared code
9031   // to be more pessimistic.
9032   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
9033   if (!is_simm32(disp)) return false;
9034   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
9035   if (!is_simm32(disp)) return false;
9036 
9037   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
9038 
9039   // Because rip relative is a disp + address_of_next_instruction and we
9040   // don't know the value of address_of_next_instruction we apply a fudge factor
9041   // to make sure we will be ok no matter the size of the instruction we get placed into.
9042   // We don't have to fudge the checks above here because they are already worst case.
9043 
9044   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
9045   // + 4 because better safe than sorry.
9046   const int fudge = 12 + 4;
9047   if (disp < 0) {
9048     disp -= fudge;
9049   } else {
9050     disp += fudge;
9051   }
9052   return is_simm32(disp);
9053 }
9054 
9055 void Assembler::emit_data64(jlong data,
9056                             relocInfo::relocType rtype,
9057                             int format) {
9058   if (rtype == relocInfo::none) {
9059     emit_int64(data);
9060   } else {
9061     emit_data64(data, Relocation::spec_simple(rtype), format);
9062   }
9063 }
9064 
9065 void Assembler::emit_data64(jlong data,
9066                             RelocationHolder const& rspec,
9067                             int format) {
9068   assert(imm_operand == 0, "default format must be immediate in this file");
9069   assert(imm_operand == format, "must be immediate");
9070   assert(inst_mark() != NULL, "must be inside InstructionMark");
9071   // Do not use AbstractAssembler::relocate, which is not intended for
9072   // embedded words.  Instead, relocate to the enclosing instruction.
9073   code_section()->relocate(inst_mark(), rspec, format);
9074 #ifdef ASSERT
9075   check_relocation(rspec, format);
9076 #endif
9077   emit_int64(data);
9078 }
9079 
9080 void Assembler::prefix(Register reg) {
9081   if (reg->encoding() >= 8) {
9082     prefix(REX_B);
9083   }
9084 }
9085 
9086 void Assembler::prefix(Register dst, Register src, Prefix p) {
9087   if (src->encoding() >= 8) {
9088     p = (Prefix)(p | REX_B);
9089   }
9090   if (dst->encoding() >= 8) {
9091     p = (Prefix)(p | REX_R);
9092   }
9093   if (p != Prefix_EMPTY) {
9094     // do not generate an empty prefix
9095     prefix(p);
9096   }
9097 }
9098 
9099 void Assembler::prefix(Register dst, Address adr, Prefix p) {
9100   if (adr.base_needs_rex()) {
9101     if (adr.index_needs_rex()) {
9102       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9103     } else {
9104       prefix(REX_B);
9105     }
9106   } else {
9107     if (adr.index_needs_rex()) {
9108       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
9109     }
9110   }
9111   if (dst->encoding() >= 8) {
9112     p = (Prefix)(p | REX_R);
9113   }
9114   if (p != Prefix_EMPTY) {
9115     // do not generate an empty prefix
9116     prefix(p);
9117   }
9118 }
9119 
9120 void Assembler::prefix(Address adr) {
9121   if (adr.base_needs_rex()) {
9122     if (adr.index_needs_rex()) {
9123       prefix(REX_XB);
9124     } else {
9125       prefix(REX_B);
9126     }
9127   } else {
9128     if (adr.index_needs_rex()) {
9129       prefix(REX_X);
9130     }
9131   }
9132 }
9133 
9134 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
9135   if (reg->encoding() < 8) {
9136     if (adr.base_needs_rex()) {
9137       if (adr.index_needs_rex()) {
9138         prefix(REX_XB);
9139       } else {
9140         prefix(REX_B);
9141       }
9142     } else {
9143       if (adr.index_needs_rex()) {
9144         prefix(REX_X);
9145       } else if (byteinst && reg->encoding() >= 4) {
9146         prefix(REX);
9147       }
9148     }
9149   } else {
9150     if (adr.base_needs_rex()) {
9151       if (adr.index_needs_rex()) {
9152         prefix(REX_RXB);
9153       } else {
9154         prefix(REX_RB);
9155       }
9156     } else {
9157       if (adr.index_needs_rex()) {
9158         prefix(REX_RX);
9159       } else {
9160         prefix(REX_R);
9161       }
9162     }
9163   }
9164 }
9165 
9166 void Assembler::prefix(Address adr, XMMRegister reg) {
9167   if (reg->encoding() < 8) {
9168     if (adr.base_needs_rex()) {
9169       if (adr.index_needs_rex()) {
9170         prefix(REX_XB);
9171       } else {
9172         prefix(REX_B);
9173       }
9174     } else {
9175       if (adr.index_needs_rex()) {
9176         prefix(REX_X);
9177       }
9178     }
9179   } else {
9180     if (adr.base_needs_rex()) {
9181       if (adr.index_needs_rex()) {
9182         prefix(REX_RXB);
9183       } else {
9184         prefix(REX_RB);
9185       }
9186     } else {
9187       if (adr.index_needs_rex()) {
9188         prefix(REX_RX);
9189       } else {
9190         prefix(REX_R);
9191       }
9192     }
9193   }
9194 }
9195 
9196 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
9197   if (reg_enc >= 8) {
9198     prefix(REX_B);
9199     reg_enc -= 8;
9200   } else if (byteinst && reg_enc >= 4) {
9201     prefix(REX);
9202   }
9203   return reg_enc;
9204 }
9205 
9206 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
9207   if (dst_enc < 8) {
9208     if (src_enc >= 8) {
9209       prefix(REX_B);
9210       src_enc -= 8;
9211     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
9212       prefix(REX);
9213     }
9214   } else {
9215     if (src_enc < 8) {
9216       prefix(REX_R);
9217     } else {
9218       prefix(REX_RB);
9219       src_enc -= 8;
9220     }
9221     dst_enc -= 8;
9222   }
9223   return dst_enc << 3 | src_enc;
9224 }
9225 
9226 int8_t Assembler::get_prefixq(Address adr) {
9227   int8_t prfx = get_prefixq(adr, rax);
9228   assert(REX_W <= prfx && prfx <= REX_WXB, "must be");
9229   return prfx;
9230 }
9231 
9232 int8_t Assembler::get_prefixq(Address adr, Register src) {
9233   int8_t prfx = (int8_t)(REX_W +
9234                          ((int)adr.base_needs_rex()) +
9235                          ((int)adr.index_needs_rex() << 1) +
9236                          ((int)(src->encoding() >= 8) << 2));
9237 #ifdef ASSERT
9238   if (src->encoding() < 8) {
9239     if (adr.base_needs_rex()) {
9240       if (adr.index_needs_rex()) {
9241         assert(prfx == REX_WXB, "must be");
9242       } else {
9243         assert(prfx == REX_WB, "must be");
9244       }
9245     } else {
9246       if (adr.index_needs_rex()) {
9247         assert(prfx == REX_WX, "must be");
9248       } else {
9249         assert(prfx == REX_W, "must be");
9250       }
9251     }
9252   } else {
9253     if (adr.base_needs_rex()) {
9254       if (adr.index_needs_rex()) {
9255         assert(prfx == REX_WRXB, "must be");
9256       } else {
9257         assert(prfx == REX_WRB, "must be");
9258       }
9259     } else {
9260       if (adr.index_needs_rex()) {
9261         assert(prfx == REX_WRX, "must be");
9262       } else {
9263         assert(prfx == REX_WR, "must be");
9264       }
9265     }
9266   }
9267 #endif
9268   return prfx;
9269 }
9270 
9271 void Assembler::prefixq(Address adr) {
9272   emit_int8(get_prefixq(adr));
9273 }
9274 
9275 void Assembler::prefixq(Address adr, Register src) {
9276   emit_int8(get_prefixq(adr, src));
9277 }
9278 
9279 void Assembler::prefixq(Address adr, XMMRegister src) {
9280   if (src->encoding() < 8) {
9281     if (adr.base_needs_rex()) {
9282       if (adr.index_needs_rex()) {
9283         prefix(REX_WXB);
9284       } else {
9285         prefix(REX_WB);
9286       }
9287     } else {
9288       if (adr.index_needs_rex()) {
9289         prefix(REX_WX);
9290       } else {
9291         prefix(REX_W);
9292       }
9293     }
9294   } else {
9295     if (adr.base_needs_rex()) {
9296       if (adr.index_needs_rex()) {
9297         prefix(REX_WRXB);
9298       } else {
9299         prefix(REX_WRB);
9300       }
9301     } else {
9302       if (adr.index_needs_rex()) {
9303         prefix(REX_WRX);
9304       } else {
9305         prefix(REX_WR);
9306       }
9307     }
9308   }
9309 }
9310 
9311 int Assembler::prefixq_and_encode(int reg_enc) {
9312   if (reg_enc < 8) {
9313     prefix(REX_W);
9314   } else {
9315     prefix(REX_WB);
9316     reg_enc -= 8;
9317   }
9318   return reg_enc;
9319 }
9320 
9321 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
9322   if (dst_enc < 8) {
9323     if (src_enc < 8) {
9324       prefix(REX_W);
9325     } else {
9326       prefix(REX_WB);
9327       src_enc -= 8;
9328     }
9329   } else {
9330     if (src_enc < 8) {
9331       prefix(REX_WR);
9332     } else {
9333       prefix(REX_WRB);
9334       src_enc -= 8;
9335     }
9336     dst_enc -= 8;
9337   }
9338   return dst_enc << 3 | src_enc;
9339 }
9340 
9341 void Assembler::adcq(Register dst, int32_t imm32) {
9342   (void) prefixq_and_encode(dst->encoding());
9343   emit_arith(0x81, 0xD0, dst, imm32);
9344 }
9345 
9346 void Assembler::adcq(Register dst, Address src) {
9347   InstructionMark im(this);
9348   emit_int16(get_prefixq(src, dst), 0x13);
9349   emit_operand(dst, src);
9350 }
9351 
9352 void Assembler::adcq(Register dst, Register src) {
9353   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9354   emit_arith(0x13, 0xC0, dst, src);
9355 }
9356 
9357 void Assembler::addq(Address dst, int32_t imm32) {
9358   InstructionMark im(this);
9359   prefixq(dst);
9360   emit_arith_operand(0x81, rax, dst, imm32);
9361 }
9362 
9363 void Assembler::addq(Address dst, Register src) {
9364   InstructionMark im(this);
9365   emit_int16(get_prefixq(dst, src), 0x01);
9366   emit_operand(src, dst);
9367 }
9368 
9369 void Assembler::addq(Register dst, int32_t imm32) {
9370   (void) prefixq_and_encode(dst->encoding());
9371   emit_arith(0x81, 0xC0, dst, imm32);
9372 }
9373 
9374 void Assembler::addq(Register dst, Address src) {
9375   InstructionMark im(this);
9376   emit_int16(get_prefixq(src, dst), 0x03);
9377   emit_operand(dst, src);
9378 }
9379 
9380 void Assembler::addq(Register dst, Register src) {
9381   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9382   emit_arith(0x03, 0xC0, dst, src);
9383 }
9384 
9385 void Assembler::adcxq(Register dst, Register src) {
9386   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9387   emit_int8(0x66);
9388   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9389   emit_int32(0x0F,
9390              0x38,
9391              (unsigned char)0xF6,
9392              (0xC0 | encode));
9393 }
9394 
9395 void Assembler::adoxq(Register dst, Register src) {
9396   //assert(VM_Version::supports_adx(), "adx instructions not supported");
9397   emit_int8((unsigned char)0xF3);
9398   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9399   emit_int32(0x0F,
9400              0x38,
9401              (unsigned char)0xF6,
9402              (0xC0 | encode));
9403 }
9404 
9405 void Assembler::andq(Address dst, int32_t imm32) {
9406   InstructionMark im(this);
9407   emit_int16(get_prefixq(dst), (unsigned char)0x81);
9408   emit_operand(rsp, dst, 4);
9409   emit_int32(imm32);
9410 }
9411 
9412 void Assembler::andq(Register dst, int32_t imm32) {
9413   (void) prefixq_and_encode(dst->encoding());
9414   emit_arith(0x81, 0xE0, dst, imm32);
9415 }
9416 
9417 void Assembler::andq(Register dst, Address src) {
9418   InstructionMark im(this);
9419   emit_int16(get_prefixq(src, dst), 0x23);
9420   emit_operand(dst, src);
9421 }
9422 
9423 void Assembler::andq(Register dst, Register src) {
9424   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9425   emit_arith(0x23, 0xC0, dst, src);
9426 }
9427 
9428 void Assembler::andnq(Register dst, Register src1, Register src2) {
9429   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9430   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9431   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9432   emit_int16((unsigned char)0xF2, (0xC0 | encode));
9433 }
9434 
9435 void Assembler::andnq(Register dst, Register src1, Address src2) {
9436   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9437   InstructionMark im(this);
9438   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9439   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9440   emit_int8((unsigned char)0xF2);
9441   emit_operand(dst, src2);
9442 }
9443 
9444 void Assembler::bsfq(Register dst, Register src) {
9445   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9446   emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
9447 }
9448 
9449 void Assembler::bsrq(Register dst, Register src) {
9450   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9451   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
9452 }
9453 
9454 void Assembler::bswapq(Register reg) {
9455   int encode = prefixq_and_encode(reg->encoding());
9456   emit_int16(0x0F, (0xC8 | encode));
9457 }
9458 
9459 void Assembler::blsiq(Register dst, Register src) {
9460   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9461   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9462   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9463   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9464 }
9465 
9466 void Assembler::blsiq(Register dst, Address src) {
9467   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9468   InstructionMark im(this);
9469   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9470   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9471   emit_int8((unsigned char)0xF3);
9472   emit_operand(rbx, src);
9473 }
9474 
9475 void Assembler::blsmskq(Register dst, Register src) {
9476   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9477   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9478   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9479   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9480 }
9481 
9482 void Assembler::blsmskq(Register dst, Address src) {
9483   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9484   InstructionMark im(this);
9485   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9486   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9487   emit_int8((unsigned char)0xF3);
9488   emit_operand(rdx, src);
9489 }
9490 
9491 void Assembler::blsrq(Register dst, Register src) {
9492   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9493   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9494   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9495   emit_int16((unsigned char)0xF3, (0xC0 | encode));
9496 }
9497 
9498 void Assembler::blsrq(Register dst, Address src) {
9499   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
9500   InstructionMark im(this);
9501   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9502   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
9503   emit_int8((unsigned char)0xF3);
9504   emit_operand(rcx, src);
9505 }
9506 
9507 void Assembler::cdqq() {
9508   emit_int16(REX_W, (unsigned char)0x99);
9509 }
9510 
9511 void Assembler::clflush(Address adr) {
9512   assert(VM_Version::supports_clflush(), "should do");
9513   prefix(adr);
9514   emit_int16(0x0F, (unsigned char)0xAE);
9515   emit_operand(rdi, adr);
9516 }
9517 
9518 void Assembler::clflushopt(Address adr) {
9519   assert(VM_Version::supports_clflushopt(), "should do!");
9520   // adr should be base reg only with no index or offset
9521   assert(adr.index() == noreg, "index should be noreg");
9522   assert(adr.scale() == Address::no_scale, "scale should be no_scale");
9523   assert(adr.disp() == 0, "displacement should be 0");
9524   // instruction prefix is 0x66
9525   emit_int8(0x66);
9526   prefix(adr);
9527   // opcode family is 0x0F 0xAE
9528   emit_int16(0x0F, (unsigned char)0xAE);
9529   // extended opcode byte is 7 == rdi
9530   emit_operand(rdi, adr);
9531 }
9532 
9533 void Assembler::clwb(Address adr) {
9534   assert(VM_Version::supports_clwb(), "should do!");
9535   // adr should be base reg only with no index or offset
9536   assert(adr.index() == noreg, "index should be noreg");
9537   assert(adr.scale() == Address::no_scale, "scale should be no_scale");
9538   assert(adr.disp() == 0, "displacement should be 0");
9539   // instruction prefix is 0x66
9540   emit_int8(0x66);
9541   prefix(adr);
9542   // opcode family is 0x0f 0xAE
9543   emit_int16(0x0F, (unsigned char)0xAE);
9544   // extended opcode byte is 6 == rsi
9545   emit_operand(rsi, adr);
9546 }
9547 
9548 void Assembler::cmovq(Condition cc, Register dst, Register src) {
9549   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9550   emit_int24(0x0F, (0x40 | cc), (0xC0 | encode));
9551 }
9552 
9553 void Assembler::cmovq(Condition cc, Register dst, Address src) {
9554   InstructionMark im(this);
9555   emit_int24(get_prefixq(src, dst), 0x0F, (0x40 | cc));
9556   emit_operand(dst, src);
9557 }
9558 
9559 void Assembler::cmpq(Address dst, int32_t imm32) {
9560   InstructionMark im(this);
9561   emit_int16(get_prefixq(dst), (unsigned char)0x81);
9562   emit_operand(rdi, dst, 4);
9563   emit_int32(imm32);
9564 }
9565 
9566 void Assembler::cmpq(Register dst, int32_t imm32) {
9567   (void) prefixq_and_encode(dst->encoding());
9568   emit_arith(0x81, 0xF8, dst, imm32);
9569 }
9570 
9571 void Assembler::cmpq(Address dst, Register src) {
9572   InstructionMark im(this);
9573   emit_int16(get_prefixq(dst, src), 0x3B);
9574   emit_operand(src, dst);
9575 }
9576 
9577 void Assembler::cmpq(Register dst, Register src) {
9578   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9579   emit_arith(0x3B, 0xC0, dst, src);
9580 }
9581 
9582 void Assembler::cmpq(Register dst, Address src) {
9583   InstructionMark im(this);
9584   emit_int16(get_prefixq(src, dst), 0x3B);
9585   emit_operand(dst, src);
9586 }
9587 
9588 void Assembler::cmpxchgq(Register reg, Address adr) {
9589   InstructionMark im(this);
9590   emit_int24(get_prefixq(adr, reg), 0x0F, (unsigned char)0xB1);
9591   emit_operand(reg, adr);
9592 }
9593 
9594 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
9595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9596   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9597   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9598   emit_int16(0x2A, (0xC0 | encode));
9599 }
9600 
9601 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
9602   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9603   InstructionMark im(this);
9604   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9605   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9606   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9607   emit_int8(0x2A);
9608   emit_operand(dst, src);
9609 }
9610 
9611 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
9612   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9613   InstructionMark im(this);
9614   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9615   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
9616   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9617   emit_int8(0x2A);
9618   emit_operand(dst, src);
9619 }
9620 
9621 void Assembler::cvttsd2siq(Register dst, Address src) {
9622   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9623   // F2 REX.W 0F 2C /r
9624   // CVTTSD2SI r64, xmm1/m64
9625   InstructionMark im(this);
9626   emit_int32((unsigned char)0xF2, REX_W, 0x0F, 0x2C);
9627   emit_operand(dst, src);
9628 }
9629 
9630 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
9631   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9632   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9633   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
9634   emit_int16(0x2C, (0xC0 | encode));
9635 }
9636 
9637 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
9638   NOT_LP64(assert(VM_Version::supports_sse(), ""));
9639   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9640   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
9641   emit_int16(0x2C, (0xC0 | encode));
9642 }
9643 
9644 void Assembler::decl(Register dst) {
9645   // Don't use it directly. Use MacroAssembler::decrementl() instead.
9646   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
9647   int encode = prefix_and_encode(dst->encoding());
9648   emit_int16((unsigned char)0xFF, (0xC8 | encode));
9649 }
9650 
9651 void Assembler::decq(Register dst) {
9652   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9653   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9654   int encode = prefixq_and_encode(dst->encoding());
9655   emit_int16((unsigned char)0xFF, 0xC8 | encode);
9656 }
9657 
9658 void Assembler::decq(Address dst) {
9659   // Don't use it directly. Use MacroAssembler::decrementq() instead.
9660   InstructionMark im(this);
9661   emit_int16(get_prefixq(dst), (unsigned char)0xFF);
9662   emit_operand(rcx, dst);
9663 }
9664 
9665 void Assembler::fxrstor(Address src) {
9666   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
9667   emit_operand(as_Register(1), src);
9668 }
9669 
9670 void Assembler::xrstor(Address src) {
9671   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
9672   emit_operand(as_Register(5), src);
9673 }
9674 
9675 void Assembler::fxsave(Address dst) {
9676   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
9677   emit_operand(as_Register(0), dst);
9678 }
9679 
9680 void Assembler::xsave(Address dst) {
9681   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
9682   emit_operand(as_Register(4), dst);
9683 }
9684 
9685 void Assembler::idivq(Register src) {
9686   int encode = prefixq_and_encode(src->encoding());
9687   emit_int16((unsigned char)0xF7, (0xF8 | encode));
9688 }
9689 
9690 void Assembler::imulq(Register dst, Register src) {
9691   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9692   emit_int24(0x0F, (unsigned char)0xAF, (0xC0 | encode));
9693 }
9694 
9695 void Assembler::imulq(Register dst, Register src, int value) {
9696   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9697   if (is8bit(value)) {
9698     emit_int24(0x6B, (0xC0 | encode), (value & 0xFF));
9699   } else {
9700     emit_int16(0x69, (0xC0 | encode));
9701     emit_int32(value);
9702   }
9703 }
9704 
9705 void Assembler::imulq(Register dst, Address src) {
9706   InstructionMark im(this);
9707   emit_int24(get_prefixq(src, dst), 0x0F, (unsigned char)0xAF);
9708   emit_operand(dst, src);
9709 }
9710 
9711 void Assembler::incl(Register dst) {
9712   // Don't use it directly. Use MacroAssembler::incrementl() instead.
9713   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9714   int encode = prefix_and_encode(dst->encoding());
9715   emit_int16((unsigned char)0xFF, (0xC0 | encode));
9716 }
9717 
9718 void Assembler::incq(Register dst) {
9719   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9720   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
9721   int encode = prefixq_and_encode(dst->encoding());
9722   emit_int16((unsigned char)0xFF, (0xC0 | encode));
9723 }
9724 
9725 void Assembler::incq(Address dst) {
9726   // Don't use it directly. Use MacroAssembler::incrementq() instead.
9727   InstructionMark im(this);
9728   emit_int16(get_prefixq(dst), (unsigned char)0xFF);
9729   emit_operand(rax, dst);
9730 }
9731 
9732 void Assembler::lea(Register dst, Address src) {
9733   leaq(dst, src);
9734 }
9735 
9736 void Assembler::leaq(Register dst, Address src) {
9737   InstructionMark im(this);
9738   emit_int16(get_prefixq(src, dst), (unsigned char)0x8D);
9739   emit_operand(dst, src);
9740 }
9741 
9742 void Assembler::mov64(Register dst, int64_t imm64) {
9743   InstructionMark im(this);
9744   int encode = prefixq_and_encode(dst->encoding());
9745   emit_int8(0xB8 | encode);
9746   emit_int64(imm64);
9747 }
9748 
9749 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
9750   InstructionMark im(this);
9751   int encode = prefixq_and_encode(dst->encoding());
9752   emit_int8(0xB8 | encode);
9753   emit_data64(imm64, rspec);
9754 }
9755 
9756 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
9757   InstructionMark im(this);
9758   int encode = prefix_and_encode(dst->encoding());
9759   emit_int8(0xB8 | encode);
9760   emit_data((int)imm32, rspec, narrow_oop_operand);
9761 }
9762 
9763 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
9764   InstructionMark im(this);
9765   prefix(dst);
9766   emit_int8((unsigned char)0xC7);
9767   emit_operand(rax, dst, 4);
9768   emit_data((int)imm32, rspec, narrow_oop_operand);
9769 }
9770 
9771 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
9772   InstructionMark im(this);
9773   int encode = prefix_and_encode(src1->encoding());
9774   emit_int16((unsigned char)0x81, (0xF8 | encode));
9775   emit_data((int)imm32, rspec, narrow_oop_operand);
9776 }
9777 
9778 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
9779   InstructionMark im(this);
9780   prefix(src1);
9781   emit_int8((unsigned char)0x81);
9782   emit_operand(rax, src1, 4);
9783   emit_data((int)imm32, rspec, narrow_oop_operand);
9784 }
9785 
9786 void Assembler::lzcntq(Register dst, Register src) {
9787   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
9788   emit_int8((unsigned char)0xF3);
9789   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9790   emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
9791 }
9792 
9793 void Assembler::movdq(XMMRegister dst, Register src) {
9794   // table D-1 says MMX/SSE2
9795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9796   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9797   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9798   emit_int16(0x6E, (0xC0 | encode));
9799 }
9800 
9801 void Assembler::movdq(Register dst, XMMRegister src) {
9802   // table D-1 says MMX/SSE2
9803   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
9804   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
9805   // swap src/dst to get correct prefix
9806   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
9807   emit_int16(0x7E,
9808              (0xC0 | encode));
9809 }
9810 
9811 void Assembler::movq(Register dst, Register src) {
9812   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9813   emit_int16((unsigned char)0x8B,
9814              (0xC0 | encode));
9815 }
9816 
9817 void Assembler::movq(Register dst, Address src) {
9818   InstructionMark im(this);
9819   emit_int16(get_prefixq(src, dst), (unsigned char)0x8B);
9820   emit_operand(dst, src);
9821 }
9822 
9823 void Assembler::movq(Address dst, Register src) {
9824   InstructionMark im(this);
9825   emit_int16(get_prefixq(dst, src), (unsigned char)0x89);
9826   emit_operand(src, dst);
9827 }
9828 
9829 void Assembler::movsbq(Register dst, Address src) {
9830   InstructionMark im(this);
9831   emit_int24(get_prefixq(src, dst),
9832              0x0F,
9833              (unsigned char)0xBE);
9834   emit_operand(dst, src);
9835 }
9836 
9837 void Assembler::movsbq(Register dst, Register src) {
9838   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9839   emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
9840 }
9841 
9842 void Assembler::movslq(Register dst, int32_t imm32) {
9843   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
9844   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
9845   // as a result we shouldn't use until tested at runtime...
9846   ShouldNotReachHere();
9847   InstructionMark im(this);
9848   int encode = prefixq_and_encode(dst->encoding());
9849   emit_int8(0xC7 | encode);
9850   emit_int32(imm32);
9851 }
9852 
9853 void Assembler::movslq(Address dst, int32_t imm32) {
9854   assert(is_simm32(imm32), "lost bits");
9855   InstructionMark im(this);
9856   emit_int16(get_prefixq(dst), (unsigned char)0xC7);
9857   emit_operand(rax, dst, 4);
9858   emit_int32(imm32);
9859 }
9860 
9861 void Assembler::movslq(Register dst, Address src) {
9862   InstructionMark im(this);
9863   emit_int16(get_prefixq(src, dst), 0x63);
9864   emit_operand(dst, src);
9865 }
9866 
9867 void Assembler::movslq(Register dst, Register src) {
9868   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9869   emit_int16(0x63, (0xC0 | encode));
9870 }
9871 
9872 void Assembler::movswq(Register dst, Address src) {
9873   InstructionMark im(this);
9874   emit_int24(get_prefixq(src, dst),
9875              0x0F,
9876              (unsigned char)0xBF);
9877   emit_operand(dst, src);
9878 }
9879 
9880 void Assembler::movswq(Register dst, Register src) {
9881   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9882   emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
9883 }
9884 
9885 void Assembler::movzbq(Register dst, Address src) {
9886   InstructionMark im(this);
9887   emit_int24(get_prefixq(src, dst),
9888              0x0F,
9889              (unsigned char)0xB6);
9890   emit_operand(dst, src);
9891 }
9892 
9893 void Assembler::movzbq(Register dst, Register src) {
9894   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9895   emit_int24(0x0F, (unsigned char)0xB6, (0xC0 | encode));
9896 }
9897 
9898 void Assembler::movzwq(Register dst, Address src) {
9899   InstructionMark im(this);
9900   emit_int24(get_prefixq(src, dst),
9901              0x0F,
9902              (unsigned char)0xB7);
9903   emit_operand(dst, src);
9904 }
9905 
9906 void Assembler::movzwq(Register dst, Register src) {
9907   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9908   emit_int24(0x0F, (unsigned char)0xB7, (0xC0 | encode));
9909 }
9910 
9911 void Assembler::mulq(Address src) {
9912   InstructionMark im(this);
9913   emit_int16(get_prefixq(src), (unsigned char)0xF7);
9914   emit_operand(rsp, src);
9915 }
9916 
9917 void Assembler::mulq(Register src) {
9918   int encode = prefixq_and_encode(src->encoding());
9919   emit_int16((unsigned char)0xF7, (0xE0 | encode));
9920 }
9921 
9922 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
9923   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
9924   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
9925   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
9926   emit_int16((unsigned char)0xF6, (0xC0 | encode));
9927 }
9928 
9929 void Assembler::negq(Register dst) {
9930   int encode = prefixq_and_encode(dst->encoding());
9931   emit_int16((unsigned char)0xF7, (0xD8 | encode));
9932 }
9933 
9934 void Assembler::notq(Register dst) {
9935   int encode = prefixq_and_encode(dst->encoding());
9936   emit_int16((unsigned char)0xF7, (0xD0 | encode));
9937 }
9938 
9939 void Assembler::btsq(Address dst, int imm8) {
9940   assert(isByte(imm8), "not a byte");
9941   InstructionMark im(this);
9942   emit_int24(get_prefixq(dst),
9943              0x0F,
9944              (unsigned char)0xBA);
9945   emit_operand(rbp /* 5 */, dst, 1);
9946   emit_int8(imm8);
9947 }
9948 
9949 void Assembler::btrq(Address dst, int imm8) {
9950   assert(isByte(imm8), "not a byte");
9951   InstructionMark im(this);
9952   emit_int24(get_prefixq(dst),
9953              0x0F,
9954              (unsigned char)0xBA);
9955   emit_operand(rsi /* 6 */, dst, 1);
9956   emit_int8(imm8);
9957 }
9958 
9959 void Assembler::orq(Address dst, int32_t imm32) {
9960   InstructionMark im(this);
9961   emit_int16(get_prefixq(dst), (unsigned char)0x81);
9962   emit_operand(rcx, dst, 4);
9963   emit_int32(imm32);
9964 }
9965 
9966 void Assembler::orq(Register dst, int32_t imm32) {
9967   (void) prefixq_and_encode(dst->encoding());
9968   emit_arith(0x81, 0xC8, dst, imm32);
9969 }
9970 
9971 void Assembler::orq(Register dst, Address src) {
9972   InstructionMark im(this);
9973   emit_int16(get_prefixq(src, dst), 0x0B);
9974   emit_operand(dst, src);
9975 }
9976 
9977 void Assembler::orq(Register dst, Register src) {
9978   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9979   emit_arith(0x0B, 0xC0, dst, src);
9980 }
9981 
9982 void Assembler::popcntq(Register dst, Address src) {
9983   assert(VM_Version::supports_popcnt(), "must support");
9984   InstructionMark im(this);
9985   emit_int32((unsigned char)0xF3,
9986              get_prefixq(src, dst),
9987              0x0F,
9988              (unsigned char)0xB8);
9989   emit_operand(dst, src);
9990 }
9991 
9992 void Assembler::popcntq(Register dst, Register src) {
9993   assert(VM_Version::supports_popcnt(), "must support");
9994   emit_int8((unsigned char)0xF3);
9995   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9996   emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
9997 }
9998 
9999 void Assembler::popq(Address dst) {
10000   InstructionMark im(this);
10001   emit_int16(get_prefixq(dst), (unsigned char)0x8F);
10002   emit_operand(rax, dst);
10003 }
10004 
10005 // Precomputable: popa, pusha, vzeroupper
10006 
10007 // The result of these routines are invariant from one invocation to another
10008 // invocation for the duration of a run. Caching the result on bootstrap
10009 // and copying it out on subsequent invocations can thus be beneficial
10010 static bool     precomputed = false;
10011 
10012 static u_char* popa_code  = NULL;
10013 static int     popa_len   = 0;
10014 
10015 static u_char* pusha_code = NULL;
10016 static int     pusha_len  = 0;
10017 
10018 static u_char* vzup_code  = NULL;
10019 static int     vzup_len   = 0;
10020 
10021 void Assembler::precompute_instructions() {
10022   assert(!Universe::is_fully_initialized(), "must still be single threaded");
10023   guarantee(!precomputed, "only once");
10024   precomputed = true;
10025   ResourceMark rm;
10026 
10027   // Make a temporary buffer big enough for the routines we're capturing
10028   int size = 256;
10029   char* tmp_code = NEW_RESOURCE_ARRAY(char, size);
10030   CodeBuffer buffer((address)tmp_code, size);
10031   MacroAssembler masm(&buffer);
10032 
10033   address begin_popa  = masm.code_section()->end();
10034   masm.popa_uncached();
10035   address end_popa    = masm.code_section()->end();
10036   masm.pusha_uncached();
10037   address end_pusha   = masm.code_section()->end();
10038   masm.vzeroupper_uncached();
10039   address end_vzup    = masm.code_section()->end();
10040 
10041   // Save the instructions to permanent buffers.
10042   popa_len = (int)(end_popa - begin_popa);
10043   popa_code = NEW_C_HEAP_ARRAY(u_char, popa_len, mtInternal);
10044   memcpy(popa_code, begin_popa, popa_len);
10045 
10046   pusha_len = (int)(end_pusha - end_popa);
10047   pusha_code = NEW_C_HEAP_ARRAY(u_char, pusha_len, mtInternal);
10048   memcpy(pusha_code, end_popa, pusha_len);
10049 
10050   vzup_len = (int)(end_vzup - end_pusha);
10051   if (vzup_len > 0) {
10052     vzup_code = NEW_C_HEAP_ARRAY(u_char, vzup_len, mtInternal);
10053     memcpy(vzup_code, end_pusha, vzup_len);
10054   } else {
10055     vzup_code = pusha_code; // dummy
10056   }
10057 
10058   assert(masm.code()->total_oop_size() == 0 &&
10059          masm.code()->total_metadata_size() == 0 &&
10060          masm.code()->total_relocation_size() == 0,
10061          "pre-computed code can't reference oops, metadata or contain relocations");
10062 }
10063 
10064 static void emit_copy(CodeSection* code_section, u_char* src, int src_len) {
10065   assert(src != NULL, "code to copy must have been pre-computed");
10066   assert(code_section->limit() - code_section->end() > src_len, "code buffer not large enough");
10067   address end = code_section->end();
10068   memcpy(end, src, src_len);
10069   code_section->set_end(end + src_len);
10070 }
10071 
10072 void Assembler::popa() { // 64bit
10073   emit_copy(code_section(), popa_code, popa_len);
10074 }
10075 
10076 void Assembler::popa_uncached() { // 64bit
10077   movq(r15, Address(rsp, 0));
10078   movq(r14, Address(rsp, wordSize));
10079   movq(r13, Address(rsp, 2 * wordSize));
10080   movq(r12, Address(rsp, 3 * wordSize));
10081   movq(r11, Address(rsp, 4 * wordSize));
10082   movq(r10, Address(rsp, 5 * wordSize));
10083   movq(r9,  Address(rsp, 6 * wordSize));
10084   movq(r8,  Address(rsp, 7 * wordSize));
10085   movq(rdi, Address(rsp, 8 * wordSize));
10086   movq(rsi, Address(rsp, 9 * wordSize));
10087   movq(rbp, Address(rsp, 10 * wordSize));
10088   // skip rsp
10089   movq(rbx, Address(rsp, 12 * wordSize));
10090   movq(rdx, Address(rsp, 13 * wordSize));
10091   movq(rcx, Address(rsp, 14 * wordSize));
10092   movq(rax, Address(rsp, 15 * wordSize));
10093 
10094   addq(rsp, 16 * wordSize);
10095 }
10096 
10097 void Assembler::pusha() { // 64bit
10098   emit_copy(code_section(), pusha_code, pusha_len);
10099 }
10100 
10101 void Assembler::pusha_uncached() { // 64bit
10102   // we have to store original rsp.  ABI says that 128 bytes
10103   // below rsp are local scratch.
10104   movq(Address(rsp, -5 * wordSize), rsp);
10105 
10106   subq(rsp, 16 * wordSize);
10107 
10108   movq(Address(rsp, 15 * wordSize), rax);
10109   movq(Address(rsp, 14 * wordSize), rcx);
10110   movq(Address(rsp, 13 * wordSize), rdx);
10111   movq(Address(rsp, 12 * wordSize), rbx);
10112   // skip rsp
10113   movq(Address(rsp, 10 * wordSize), rbp);
10114   movq(Address(rsp, 9 * wordSize), rsi);
10115   movq(Address(rsp, 8 * wordSize), rdi);
10116   movq(Address(rsp, 7 * wordSize), r8);
10117   movq(Address(rsp, 6 * wordSize), r9);
10118   movq(Address(rsp, 5 * wordSize), r10);
10119   movq(Address(rsp, 4 * wordSize), r11);
10120   movq(Address(rsp, 3 * wordSize), r12);
10121   movq(Address(rsp, 2 * wordSize), r13);
10122   movq(Address(rsp, wordSize), r14);
10123   movq(Address(rsp, 0), r15);
10124 }
10125 
10126 void Assembler::vzeroupper() {
10127   emit_copy(code_section(), vzup_code, vzup_len);
10128 }
10129 
10130 void Assembler::pushq(Address src) {
10131   InstructionMark im(this);
10132   emit_int16(get_prefixq(src), (unsigned char)0xFF);
10133   emit_operand(rsi, src);
10134 }
10135 
10136 void Assembler::rclq(Register dst, int imm8) {
10137   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10138   int encode = prefixq_and_encode(dst->encoding());
10139   if (imm8 == 1) {
10140     emit_int16((unsigned char)0xD1, (0xD0 | encode));
10141   } else {
10142     emit_int24((unsigned char)0xC1, (0xD0 | encode), imm8);
10143   }
10144 }
10145 
10146 void Assembler::rcrq(Register dst, int imm8) {
10147   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10148   int encode = prefixq_and_encode(dst->encoding());
10149   if (imm8 == 1) {
10150     emit_int16((unsigned char)0xD1, (0xD8 | encode));
10151   } else {
10152     emit_int24((unsigned char)0xC1, (0xD8 | encode), imm8);
10153   }
10154 }
10155 
10156 void Assembler::rorq(Register dst, int imm8) {
10157   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10158   int encode = prefixq_and_encode(dst->encoding());
10159   if (imm8 == 1) {
10160     emit_int16((unsigned char)0xD1, (0xC8 | encode));
10161   } else {
10162     emit_int24((unsigned char)0xC1, (0xc8 | encode), imm8);
10163   }
10164 }
10165 
10166 void Assembler::rorxq(Register dst, Register src, int imm8) {
10167   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
10168   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
10169   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
10170   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
10171 }
10172 
10173 void Assembler::rorxd(Register dst, Register src, int imm8) {
10174   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
10175   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
10176   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
10177   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
10178 }
10179 
10180 void Assembler::sarq(Register dst, int imm8) {
10181   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10182   int encode = prefixq_and_encode(dst->encoding());
10183   if (imm8 == 1) {
10184     emit_int16((unsigned char)0xD1, (0xF8 | encode));
10185   } else {
10186     emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
10187   }
10188 }
10189 
10190 void Assembler::sarq(Register dst) {
10191   int encode = prefixq_and_encode(dst->encoding());
10192   emit_int16((unsigned char)0xD3, (0xF8 | encode));
10193 }
10194 
10195 void Assembler::sbbq(Address dst, int32_t imm32) {
10196   InstructionMark im(this);
10197   prefixq(dst);
10198   emit_arith_operand(0x81, rbx, dst, imm32);
10199 }
10200 
10201 void Assembler::sbbq(Register dst, int32_t imm32) {
10202   (void) prefixq_and_encode(dst->encoding());
10203   emit_arith(0x81, 0xD8, dst, imm32);
10204 }
10205 
10206 void Assembler::sbbq(Register dst, Address src) {
10207   InstructionMark im(this);
10208   emit_int16(get_prefixq(src, dst), 0x1B);
10209   emit_operand(dst, src);
10210 }
10211 
10212 void Assembler::sbbq(Register dst, Register src) {
10213   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10214   emit_arith(0x1B, 0xC0, dst, src);
10215 }
10216 
10217 void Assembler::shlq(Register dst, int imm8) {
10218   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10219   int encode = prefixq_and_encode(dst->encoding());
10220   if (imm8 == 1) {
10221     emit_int16((unsigned char)0xD1, (0xE0 | encode));
10222   } else {
10223     emit_int24((unsigned char)0xC1, (0xE0 | encode), imm8);
10224   }
10225 }
10226 
10227 void Assembler::shlq(Register dst) {
10228   int encode = prefixq_and_encode(dst->encoding());
10229   emit_int16((unsigned char)0xD3, (0xE0 | encode));
10230 }
10231 
10232 void Assembler::shrq(Register dst, int imm8) {
10233   assert(isShiftCount(imm8 >> 1), "illegal shift count");
10234   int encode = prefixq_and_encode(dst->encoding());
10235   emit_int24((unsigned char)0xC1, (0xE8 | encode), imm8);
10236 }
10237 
10238 void Assembler::shrq(Register dst) {
10239   int encode = prefixq_and_encode(dst->encoding());
10240   emit_int16((unsigned char)0xD3, 0xE8 | encode);
10241 }
10242 
10243 void Assembler::subq(Address dst, int32_t imm32) {
10244   InstructionMark im(this);
10245   prefixq(dst);
10246   emit_arith_operand(0x81, rbp, dst, imm32);
10247 }
10248 
10249 void Assembler::subq(Address dst, Register src) {
10250   InstructionMark im(this);
10251   emit_int16(get_prefixq(dst, src), 0x29);
10252   emit_operand(src, dst);
10253 }
10254 
10255 void Assembler::subq(Register dst, int32_t imm32) {
10256   (void) prefixq_and_encode(dst->encoding());
10257   emit_arith(0x81, 0xE8, dst, imm32);
10258 }
10259 
10260 // Force generation of a 4 byte immediate value even if it fits into 8bit
10261 void Assembler::subq_imm32(Register dst, int32_t imm32) {
10262   (void) prefixq_and_encode(dst->encoding());
10263   emit_arith_imm32(0x81, 0xE8, dst, imm32);
10264 }
10265 
10266 void Assembler::subq(Register dst, Address src) {
10267   InstructionMark im(this);
10268   emit_int16(get_prefixq(src, dst), 0x2B);
10269   emit_operand(dst, src);
10270 }
10271 
10272 void Assembler::subq(Register dst, Register src) {
10273   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10274   emit_arith(0x2B, 0xC0, dst, src);
10275 }
10276 
10277 void Assembler::testq(Register dst, int32_t imm32) {
10278   // not using emit_arith because test
10279   // doesn't support sign-extension of
10280   // 8bit operands
10281   int encode = dst->encoding();
10282   if (encode == 0) {
10283     emit_int16(REX_W, (unsigned char)0xA9);
10284   } else {
10285     encode = prefixq_and_encode(encode);
10286     emit_int16((unsigned char)0xF7, (0xC0 | encode));
10287   }
10288   emit_int32(imm32);
10289 }
10290 
10291 void Assembler::testq(Register dst, Register src) {
10292   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10293   emit_arith(0x85, 0xC0, dst, src);
10294 }
10295 
10296 void Assembler::testq(Register dst, Address src) {
10297   InstructionMark im(this);
10298   emit_int16(get_prefixq(src, dst), (unsigned char)0x85);
10299   emit_operand(dst, src);
10300 }
10301 
10302 void Assembler::xaddq(Address dst, Register src) {
10303   InstructionMark im(this);
10304   emit_int24(get_prefixq(dst, src), 0x0F, (unsigned char)0xC1);
10305   emit_operand(src, dst);
10306 }
10307 
10308 void Assembler::xchgq(Register dst, Address src) {
10309   InstructionMark im(this);
10310   emit_int16(get_prefixq(src, dst), (unsigned char)0x87);
10311   emit_operand(dst, src);
10312 }
10313 
10314 void Assembler::xchgq(Register dst, Register src) {
10315   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
10316   emit_int16((unsigned char)0x87, (0xc0 | encode));
10317 }
10318 
10319 void Assembler::xorq(Register dst, Register src) {
10320   (void) prefixq_and_encode(dst->encoding(), src->encoding());
10321   emit_arith(0x33, 0xC0, dst, src);
10322 }
10323 
10324 void Assembler::xorq(Register dst, Address src) {
10325   InstructionMark im(this);
10326   emit_int16(get_prefixq(src, dst), 0x33);
10327   emit_operand(dst, src);
10328 }
10329 
10330 #endif // !LP64