1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && _is_evex_instruction) {
 398     switch (_tuple_type) {
 399     case EVEX_FV:
 400       if ((_evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 402       } else {
 403         mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (_input_size_in_bits) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
 463       int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
 464       if ((disp % disp_factor) == 0) {
 465         int new_disp = disp / disp_factor;
 466         if (is8bit(new_disp)) {
 467           disp = new_disp;
 468         }
 469       } else {
 470         return false;
 471       }
 472     }
 473   }
 474   return is8bit(disp);
 475 }
 476 
 477 
 478 void Assembler::emit_operand(Register reg, Register base, Register index,
 479                              Address::ScaleFactor scale, int disp,
 480                              RelocationHolder const& rspec,
 481                              int rip_relative_correction) {
 482   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 483 
 484   // Encode the registers as needed in the fields they are used in
 485 
 486   int regenc = encode(reg) << 3;
 487   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 488   int baseenc = base->is_valid() ? encode(base) : 0;
 489 
 490   if (base->is_valid()) {
 491     if (index->is_valid()) {
 492       assert(scale != Address::no_scale, "inconsistent address");
 493       // [base + index*scale + disp]
 494       if (disp == 0 && rtype == relocInfo::none  &&
 495           base != rbp LP64_ONLY(&& base != r13)) {
 496         // [base + index*scale]
 497         // [00 reg 100][ss index base]
 498         assert(index != rsp, "illegal addressing mode");
 499         emit_int8(0x04 | regenc);
 500         emit_int8(scale << 6 | indexenc | baseenc);
 501       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 502         // [base + index*scale + imm8]
 503         // [01 reg 100][ss index base] imm8
 504         assert(index != rsp, "illegal addressing mode");
 505         emit_int8(0x44 | regenc);
 506         emit_int8(scale << 6 | indexenc | baseenc);
 507         emit_int8(disp & 0xFF);
 508       } else {
 509         // [base + index*scale + disp32]
 510         // [10 reg 100][ss index base] disp32
 511         assert(index != rsp, "illegal addressing mode");
 512         emit_int8(0x84 | regenc);
 513         emit_int8(scale << 6 | indexenc | baseenc);
 514         emit_data(disp, rspec, disp32_operand);
 515       }
 516     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 517       // [rsp + disp]
 518       if (disp == 0 && rtype == relocInfo::none) {
 519         // [rsp]
 520         // [00 reg 100][00 100 100]
 521         emit_int8(0x04 | regenc);
 522         emit_int8(0x24);
 523       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 524         // [rsp + imm8]
 525         // [01 reg 100][00 100 100] disp8
 526         emit_int8(0x44 | regenc);
 527         emit_int8(0x24);
 528         emit_int8(disp & 0xFF);
 529       } else {
 530         // [rsp + imm32]
 531         // [10 reg 100][00 100 100] disp32
 532         emit_int8(0x84 | regenc);
 533         emit_int8(0x24);
 534         emit_data(disp, rspec, disp32_operand);
 535       }
 536     } else {
 537       // [base + disp]
 538       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 539       if (disp == 0 && rtype == relocInfo::none &&
 540           base != rbp LP64_ONLY(&& base != r13)) {
 541         // [base]
 542         // [00 reg base]
 543         emit_int8(0x00 | regenc | baseenc);
 544       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 545         // [base + disp8]
 546         // [01 reg base] disp8
 547         emit_int8(0x40 | regenc | baseenc);
 548         emit_int8(disp & 0xFF);
 549       } else {
 550         // [base + disp32]
 551         // [10 reg base] disp32
 552         emit_int8(0x80 | regenc | baseenc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     }
 556   } else {
 557     if (index->is_valid()) {
 558       assert(scale != Address::no_scale, "inconsistent address");
 559       // [index*scale + disp]
 560       // [00 reg 100][ss index 101] disp32
 561       assert(index != rsp, "illegal addressing mode");
 562       emit_int8(0x04 | regenc);
 563       emit_int8(scale << 6 | indexenc | 0x05);
 564       emit_data(disp, rspec, disp32_operand);
 565     } else if (rtype != relocInfo::none ) {
 566       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 567       // [00 000 101] disp32
 568 
 569       emit_int8(0x05 | regenc);
 570       // Note that the RIP-rel. correction applies to the generated
 571       // disp field, but _not_ to the target address in the rspec.
 572 
 573       // disp was created by converting the target address minus the pc
 574       // at the start of the instruction. That needs more correction here.
 575       // intptr_t disp = target - next_ip;
 576       assert(inst_mark() != NULL, "must be inside InstructionMark");
 577       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 578       int64_t adjusted = disp;
 579       // Do rip-rel adjustment for 64bit
 580       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 581       assert(is_simm32(adjusted),
 582              "must be 32bit offset (RIP relative address)");
 583       emit_data((int32_t) adjusted, rspec, disp32_operand);
 584 
 585     } else {
 586       // 32bit never did this, did everything as the rip-rel/disp code above
 587       // [disp] ABSOLUTE
 588       // [00 reg 100][00 100 101] disp32
 589       emit_int8(0x04 | regenc);
 590       emit_int8(0x25);
 591       emit_data(disp, rspec, disp32_operand);
 592     }
 593   }
 594   _is_evex_instruction = false;
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;
 628   bool is_64bit = false;
 629 
 630   debug_only(bool has_disp32 = false);
 631   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 632 
 633   again_after_prefix:
 634   switch (0xFF & *ip++) {
 635 
 636   // These convenience macros generate groups of "case" labels for the switch.
 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 639              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 640 #define REP16(x) REP8((x)+0): \
 641               case REP8((x)+8)
 642 
 643   case CS_segment:
 644   case SS_segment:
 645   case DS_segment:
 646   case ES_segment:
 647   case FS_segment:
 648   case GS_segment:
 649     // Seems dubious
 650     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 651     assert(ip == inst+1, "only one prefix allowed");
 652     goto again_after_prefix;
 653 
 654   case 0x67:
 655   case REX:
 656   case REX_B:
 657   case REX_X:
 658   case REX_XB:
 659   case REX_R:
 660   case REX_RB:
 661   case REX_RX:
 662   case REX_RXB:
 663     NOT_LP64(assert(false, "64bit prefixes"));
 664     goto again_after_prefix;
 665 
 666   case REX_W:
 667   case REX_WB:
 668   case REX_WX:
 669   case REX_WXB:
 670   case REX_WR:
 671   case REX_WRB:
 672   case REX_WRX:
 673   case REX_WRXB:
 674     NOT_LP64(assert(false, "64bit prefixes"));
 675     is_64bit = true;
 676     goto again_after_prefix;
 677 
 678   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 679   case 0x88: // movb a, r
 680   case 0x89: // movl a, r
 681   case 0x8A: // movb r, a
 682   case 0x8B: // movl r, a
 683   case 0x8F: // popl a
 684     debug_only(has_disp32 = true);
 685     break;
 686 
 687   case 0x68: // pushq #32
 688     if (which == end_pc_operand) {
 689       return ip + 4;
 690     }
 691     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 692     return ip;                  // not produced by emit_operand
 693 
 694   case 0x66: // movw ... (size prefix)
 695     again_after_size_prefix2:
 696     switch (0xFF & *ip++) {
 697     case REX:
 698     case REX_B:
 699     case REX_X:
 700     case REX_XB:
 701     case REX_R:
 702     case REX_RB:
 703     case REX_RX:
 704     case REX_RXB:
 705     case REX_W:
 706     case REX_WB:
 707     case REX_WX:
 708     case REX_WXB:
 709     case REX_WR:
 710     case REX_WRB:
 711     case REX_WRX:
 712     case REX_WRXB:
 713       NOT_LP64(assert(false, "64bit prefix found"));
 714       goto again_after_size_prefix2;
 715     case 0x8B: // movw r, a
 716     case 0x89: // movw a, r
 717       debug_only(has_disp32 = true);
 718       break;
 719     case 0xC7: // movw a, #16
 720       debug_only(has_disp32 = true);
 721       tail_size = 2;  // the imm16
 722       break;
 723     case 0x0F: // several SSE/SSE2 variants
 724       ip--;    // reparse the 0x0F
 725       goto again_after_prefix;
 726     default:
 727       ShouldNotReachHere();
 728     }
 729     break;
 730 
 731   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 732     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 733     // these asserts are somewhat nonsensical
 734 #ifndef _LP64
 735     assert(which == imm_operand || which == disp32_operand,
 736            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 737 #else
 738     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 739            which == narrow_oop_operand && !is_64bit,
 740            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 741 #endif // _LP64
 742     return ip;
 743 
 744   case 0x69: // imul r, a, #32
 745   case 0xC7: // movl a, #32(oop?)
 746     tail_size = 4;
 747     debug_only(has_disp32 = true); // has both kinds of operands!
 748     break;
 749 
 750   case 0x0F: // movx..., etc.
 751     switch (0xFF & *ip++) {
 752     case 0x3A: // pcmpestri
 753       tail_size = 1;
 754     case 0x38: // ptest, pmovzxbw
 755       ip++; // skip opcode
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757       break;
 758 
 759     case 0x70: // pshufd r, r/a, #8
 760       debug_only(has_disp32 = true); // has both kinds of operands!
 761     case 0x73: // psrldq r, #8
 762       tail_size = 1;
 763       break;
 764 
 765     case 0x12: // movlps
 766     case 0x28: // movaps
 767     case 0x2E: // ucomiss
 768     case 0x2F: // comiss
 769     case 0x54: // andps
 770     case 0x55: // andnps
 771     case 0x56: // orps
 772     case 0x57: // xorps
 773     case 0x59: //mulpd
 774     case 0x6E: // movd
 775     case 0x7E: // movd
 776     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 777       debug_only(has_disp32 = true);
 778       break;
 779 
 780     case 0xAD: // shrd r, a, %cl
 781     case 0xAF: // imul r, a
 782     case 0xBE: // movsbl r, a (movsxb)
 783     case 0xBF: // movswl r, a (movsxw)
 784     case 0xB6: // movzbl r, a (movzxb)
 785     case 0xB7: // movzwl r, a (movzxw)
 786     case REP16(0x40): // cmovl cc, r, a
 787     case 0xB0: // cmpxchgb
 788     case 0xB1: // cmpxchg
 789     case 0xC1: // xaddl
 790     case 0xC7: // cmpxchg8
 791     case REP16(0x90): // setcc a
 792       debug_only(has_disp32 = true);
 793       // fall out of the switch to decode the address
 794       break;
 795 
 796     case 0xC4: // pinsrw r, a, #8
 797       debug_only(has_disp32 = true);
 798     case 0xC5: // pextrw r, r, #8
 799       tail_size = 1;  // the imm8
 800       break;
 801 
 802     case 0xAC: // shrd r, a, #8
 803       debug_only(has_disp32 = true);
 804       tail_size = 1;  // the imm8
 805       break;
 806 
 807     case REP16(0x80): // jcc rdisp32
 808       if (which == end_pc_operand)  return ip + 4;
 809       assert(which == call32_operand, "jcc has no disp32 or imm");
 810       return ip;
 811     default:
 812       ShouldNotReachHere();
 813     }
 814     break;
 815 
 816   case 0x81: // addl a, #32; addl r, #32
 817     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 818     // on 32bit in the case of cmpl, the imm might be an oop
 819     tail_size = 4;
 820     debug_only(has_disp32 = true); // has both kinds of operands!
 821     break;
 822 
 823   case 0x83: // addl a, #8; addl r, #8
 824     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 825     debug_only(has_disp32 = true); // has both kinds of operands!
 826     tail_size = 1;
 827     break;
 828 
 829   case 0x9B:
 830     switch (0xFF & *ip++) {
 831     case 0xD9: // fnstcw a
 832       debug_only(has_disp32 = true);
 833       break;
 834     default:
 835       ShouldNotReachHere();
 836     }
 837     break;
 838 
 839   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 840   case REP4(0x10): // adc...
 841   case REP4(0x20): // and...
 842   case REP4(0x30): // xor...
 843   case REP4(0x08): // or...
 844   case REP4(0x18): // sbb...
 845   case REP4(0x28): // sub...
 846   case 0xF7: // mull a
 847   case 0x8D: // lea r, a
 848   case 0x87: // xchg r, a
 849   case REP4(0x38): // cmp...
 850   case 0x85: // test r, a
 851     debug_only(has_disp32 = true); // has both kinds of operands!
 852     break;
 853 
 854   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 855   case 0xC6: // movb a, #8
 856   case 0x80: // cmpb a, #8
 857   case 0x6B: // imul r, a, #8
 858     debug_only(has_disp32 = true); // has both kinds of operands!
 859     tail_size = 1; // the imm8
 860     break;
 861 
 862   case 0xC4: // VEX_3bytes
 863   case 0xC5: // VEX_2bytes
 864     assert((UseAVX > 0), "shouldn't have VEX prefix");
 865     assert(ip == inst+1, "no prefixes allowed");
 866     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 867     // but they have prefix 0x0F and processed when 0x0F processed above.
 868     //
 869     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 870     // instructions (these instructions are not supported in 64-bit mode).
 871     // To distinguish them bits [7:6] are set in the VEX second byte since
 872     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 873     // those VEX bits REX and vvvv bits are inverted.
 874     //
 875     // Fortunately C2 doesn't generate these instructions so we don't need
 876     // to check for them in product version.
 877 
 878     // Check second byte
 879     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 880 
 881     int vex_opcode;
 882     // First byte
 883     if ((0xFF & *inst) == VEX_3bytes) {
 884       vex_opcode = VEX_OPCODE_MASK & *ip;
 885       ip++; // third byte
 886       is_64bit = ((VEX_W & *ip) == VEX_W);
 887     } else {
 888       vex_opcode = VEX_OPCODE_0F;
 889     }
 890     ip++; // opcode
 891     // To find the end of instruction (which == end_pc_operand).
 892     switch (vex_opcode) {
 893       case VEX_OPCODE_0F:
 894         switch (0xFF & *ip) {
 895         case 0x70: // pshufd r, r/a, #8
 896         case 0x71: // ps[rl|ra|ll]w r, #8
 897         case 0x72: // ps[rl|ra|ll]d r, #8
 898         case 0x73: // ps[rl|ra|ll]q r, #8
 899         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 900         case 0xC4: // pinsrw r, r, r/a, #8
 901         case 0xC5: // pextrw r/a, r, #8
 902         case 0xC6: // shufp[s|d] r, r, r/a, #8
 903           tail_size = 1;  // the imm8
 904           break;
 905         }
 906         break;
 907       case VEX_OPCODE_0F_3A:
 908         tail_size = 1;
 909         break;
 910     }
 911     ip++; // skip opcode
 912     debug_only(has_disp32 = true); // has both kinds of operands!
 913     break;
 914 
 915   case 0x62: // EVEX_4bytes
 916     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 917     assert(ip == inst+1, "no prefixes allowed");
 918     // no EVEX collisions, all instructions that have 0x62 opcodes
 919     // have EVEX versions and are subopcodes of 0x66
 920     ip++; // skip P0 and exmaine W in P1
 921     is_64bit = ((VEX_W & *ip) == VEX_W);
 922     ip++; // move to P2
 923     ip++; // skip P2, move to opcode
 924     // To find the end of instruction (which == end_pc_operand).
 925     switch (0xFF & *ip) {
 926     case 0x61: // pcmpestri r, r/a, #8
 927     case 0x70: // pshufd r, r/a, #8
 928     case 0x73: // psrldq r, #8
 929       tail_size = 1;  // the imm8
 930       break;
 931     default:
 932       break;
 933     }
 934     ip++; // skip opcode
 935     debug_only(has_disp32 = true); // has both kinds of operands!
 936     break;
 937 
 938   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 939   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 940   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 941   case 0xDD: // fld_d a; fst_d a; fstp_d a
 942   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 943   case 0xDF: // fild_d a; fistp_d a
 944   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 945   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 946   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 947     debug_only(has_disp32 = true);
 948     break;
 949 
 950   case 0xE8: // call rdisp32
 951   case 0xE9: // jmp  rdisp32
 952     if (which == end_pc_operand)  return ip + 4;
 953     assert(which == call32_operand, "call has no disp32 or imm");
 954     return ip;
 955 
 956   case 0xF0:                    // Lock
 957     assert(os::is_MP(), "only on MP");
 958     goto again_after_prefix;
 959 
 960   case 0xF3:                    // For SSE
 961   case 0xF2:                    // For SSE2
 962     switch (0xFF & *ip++) {
 963     case REX:
 964     case REX_B:
 965     case REX_X:
 966     case REX_XB:
 967     case REX_R:
 968     case REX_RB:
 969     case REX_RX:
 970     case REX_RXB:
 971     case REX_W:
 972     case REX_WB:
 973     case REX_WX:
 974     case REX_WXB:
 975     case REX_WR:
 976     case REX_WRB:
 977     case REX_WRX:
 978     case REX_WRXB:
 979       NOT_LP64(assert(false, "found 64bit prefix"));
 980       ip++;
 981     default:
 982       ip++;
 983     }
 984     debug_only(has_disp32 = true); // has both kinds of operands!
 985     break;
 986 
 987   default:
 988     ShouldNotReachHere();
 989 
 990 #undef REP8
 991 #undef REP16
 992   }
 993 
 994   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 995 #ifdef _LP64
 996   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 997 #else
 998   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 999   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1000 #endif // LP64
1001   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1002 
1003   // parse the output of emit_operand
1004   int op2 = 0xFF & *ip++;
1005   int base = op2 & 0x07;
1006   int op3 = -1;
1007   const int b100 = 4;
1008   const int b101 = 5;
1009   if (base == b100 && (op2 >> 6) != 3) {
1010     op3 = 0xFF & *ip++;
1011     base = op3 & 0x07;   // refetch the base
1012   }
1013   // now ip points at the disp (if any)
1014 
1015   switch (op2 >> 6) {
1016   case 0:
1017     // [00 reg  100][ss index base]
1018     // [00 reg  100][00   100  esp]
1019     // [00 reg base]
1020     // [00 reg  100][ss index  101][disp32]
1021     // [00 reg  101]               [disp32]
1022 
1023     if (base == b101) {
1024       if (which == disp32_operand)
1025         return ip;              // caller wants the disp32
1026       ip += 4;                  // skip the disp32
1027     }
1028     break;
1029 
1030   case 1:
1031     // [01 reg  100][ss index base][disp8]
1032     // [01 reg  100][00   100  esp][disp8]
1033     // [01 reg base]               [disp8]
1034     ip += 1;                    // skip the disp8
1035     break;
1036 
1037   case 2:
1038     // [10 reg  100][ss index base][disp32]
1039     // [10 reg  100][00   100  esp][disp32]
1040     // [10 reg base]               [disp32]
1041     if (which == disp32_operand)
1042       return ip;                // caller wants the disp32
1043     ip += 4;                    // skip the disp32
1044     break;
1045 
1046   case 3:
1047     // [11 reg base]  (not a memory addressing mode)
1048     break;
1049   }
1050 
1051   if (which == end_pc_operand) {
1052     return ip + tail_size;
1053   }
1054 
1055 #ifdef _LP64
1056   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1057 #else
1058   assert(which == imm_operand, "instruction has only an imm field");
1059 #endif // LP64
1060   return ip;
1061 }
1062 
1063 address Assembler::locate_next_instruction(address inst) {
1064   // Secretly share code with locate_operand:
1065   return locate_operand(inst, end_pc_operand);
1066 }
1067 
1068 
1069 #ifdef ASSERT
1070 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1071   address inst = inst_mark();
1072   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1073   address opnd;
1074 
1075   Relocation* r = rspec.reloc();
1076   if (r->type() == relocInfo::none) {
1077     return;
1078   } else if (r->is_call() || format == call32_operand) {
1079     // assert(format == imm32_operand, "cannot specify a nonzero format");
1080     opnd = locate_operand(inst, call32_operand);
1081   } else if (r->is_data()) {
1082     assert(format == imm_operand || format == disp32_operand
1083            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1084     opnd = locate_operand(inst, (WhichOperand)format);
1085   } else {
1086     assert(format == imm_operand, "cannot specify a format");
1087     return;
1088   }
1089   assert(opnd == pc(), "must put operand where relocs can find it");
1090 }
1091 #endif // ASSERT
1092 
1093 void Assembler::emit_operand32(Register reg, Address adr) {
1094   assert(reg->encoding() < 8, "no extended registers");
1095   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1096   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1097                adr._rspec);
1098 }
1099 
1100 void Assembler::emit_operand(Register reg, Address adr,
1101                              int rip_relative_correction) {
1102   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1103                adr._rspec,
1104                rip_relative_correction);
1105 }
1106 
1107 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1108   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1109                adr._rspec);
1110 }
1111 
1112 // MMX operations
1113 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1114   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1115   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1116 }
1117 
1118 // work around gcc (3.2.1-7a) bug
1119 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1120   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1121   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1122 }
1123 
1124 
1125 void Assembler::emit_farith(int b1, int b2, int i) {
1126   assert(isByte(b1) && isByte(b2), "wrong opcode");
1127   assert(0 <= i &&  i < 8, "illegal stack offset");
1128   emit_int8(b1);
1129   emit_int8(b2 + i);
1130 }
1131 
1132 
1133 // Now the Assembler instructions (identical for 32/64 bits)
1134 
1135 void Assembler::adcl(Address dst, int32_t imm32) {
1136   InstructionMark im(this);
1137   prefix(dst);
1138   emit_arith_operand(0x81, rdx, dst, imm32);
1139 }
1140 
1141 void Assembler::adcl(Address dst, Register src) {
1142   InstructionMark im(this);
1143   prefix(dst, src);
1144   emit_int8(0x11);
1145   emit_operand(src, dst);
1146 }
1147 
1148 void Assembler::adcl(Register dst, int32_t imm32) {
1149   prefix(dst);
1150   emit_arith(0x81, 0xD0, dst, imm32);
1151 }
1152 
1153 void Assembler::adcl(Register dst, Address src) {
1154   InstructionMark im(this);
1155   prefix(src, dst);
1156   emit_int8(0x13);
1157   emit_operand(dst, src);
1158 }
1159 
1160 void Assembler::adcl(Register dst, Register src) {
1161   (void) prefix_and_encode(dst->encoding(), src->encoding());
1162   emit_arith(0x13, 0xC0, dst, src);
1163 }
1164 
1165 void Assembler::addl(Address dst, int32_t imm32) {
1166   InstructionMark im(this);
1167   prefix(dst);
1168   emit_arith_operand(0x81, rax, dst, imm32);
1169 }
1170 
1171 void Assembler::addl(Address dst, Register src) {
1172   InstructionMark im(this);
1173   prefix(dst, src);
1174   emit_int8(0x01);
1175   emit_operand(src, dst);
1176 }
1177 
1178 void Assembler::addl(Register dst, int32_t imm32) {
1179   prefix(dst);
1180   emit_arith(0x81, 0xC0, dst, imm32);
1181 }
1182 
1183 void Assembler::addl(Register dst, Address src) {
1184   InstructionMark im(this);
1185   prefix(src, dst);
1186   emit_int8(0x03);
1187   emit_operand(dst, src);
1188 }
1189 
1190 void Assembler::addl(Register dst, Register src) {
1191   (void) prefix_and_encode(dst->encoding(), src->encoding());
1192   emit_arith(0x03, 0xC0, dst, src);
1193 }
1194 
1195 void Assembler::addr_nop_4() {
1196   assert(UseAddressNop, "no CPU support");
1197   // 4 bytes: NOP DWORD PTR [EAX+0]
1198   emit_int8(0x0F);
1199   emit_int8(0x1F);
1200   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1201   emit_int8(0);    // 8-bits offset (1 byte)
1202 }
1203 
1204 void Assembler::addr_nop_5() {
1205   assert(UseAddressNop, "no CPU support");
1206   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1207   emit_int8(0x0F);
1208   emit_int8(0x1F);
1209   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1210   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1211   emit_int8(0);    // 8-bits offset (1 byte)
1212 }
1213 
1214 void Assembler::addr_nop_7() {
1215   assert(UseAddressNop, "no CPU support");
1216   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1217   emit_int8(0x0F);
1218   emit_int8(0x1F);
1219   emit_int8((unsigned char)0x80);
1220                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1221   emit_int32(0);   // 32-bits offset (4 bytes)
1222 }
1223 
1224 void Assembler::addr_nop_8() {
1225   assert(UseAddressNop, "no CPU support");
1226   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1227   emit_int8(0x0F);
1228   emit_int8(0x1F);
1229   emit_int8((unsigned char)0x84);
1230                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1231   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1232   emit_int32(0);   // 32-bits offset (4 bytes)
1233 }
1234 
1235 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1236   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1237   if (VM_Version::supports_evex()) {
1238     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1239   } else {
1240     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1241   }
1242 }
1243 
1244 void Assembler::addsd(XMMRegister dst, Address src) {
1245   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1246   if (VM_Version::supports_evex()) {
1247     _tuple_type = EVEX_T1S;
1248     _input_size_in_bits = EVEX_64bit;
1249     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1250   } else {
1251     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1252   }
1253 }
1254 
1255 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1257   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1258 }
1259 
1260 void Assembler::addss(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1262   if (VM_Version::supports_evex()) {
1263     _tuple_type = EVEX_T1S;
1264     _input_size_in_bits = EVEX_32bit;
1265   }
1266   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1267 }
1268 
1269 void Assembler::aesdec(XMMRegister dst, Address src) {
1270   assert(VM_Version::supports_aes(), "");
1271   InstructionMark im(this);
1272   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1273               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1274   emit_int8((unsigned char)0xDE);
1275   emit_operand(dst, src);
1276 }
1277 
1278 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1279   assert(VM_Version::supports_aes(), "");
1280   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1281                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1282   emit_int8((unsigned char)0xDE);
1283   emit_int8(0xC0 | encode);
1284 }
1285 
1286 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1287   assert(VM_Version::supports_aes(), "");
1288   InstructionMark im(this);
1289   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1290               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1291   emit_int8((unsigned char)0xDF);
1292   emit_operand(dst, src);
1293 }
1294 
1295 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1296   assert(VM_Version::supports_aes(), "");
1297   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1298                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1299   emit_int8((unsigned char)0xDF);
1300   emit_int8((unsigned char)(0xC0 | encode));
1301 }
1302 
1303 void Assembler::aesenc(XMMRegister dst, Address src) {
1304   assert(VM_Version::supports_aes(), "");
1305   InstructionMark im(this);
1306   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1307               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1308   emit_int8((unsigned char)0xDC);
1309   emit_operand(dst, src);
1310 }
1311 
1312 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1313   assert(VM_Version::supports_aes(), "");
1314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1315                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1316   emit_int8((unsigned char)0xDC);
1317   emit_int8(0xC0 | encode);
1318 }
1319 
1320 void Assembler::aesenclast(XMMRegister dst, Address src) {
1321   assert(VM_Version::supports_aes(), "");
1322   InstructionMark im(this);
1323   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1324               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1325   emit_int8((unsigned char)0xDD);
1326   emit_operand(dst, src);
1327 }
1328 
1329 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1330   assert(VM_Version::supports_aes(), "");
1331   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1332                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1333   emit_int8((unsigned char)0xDD);
1334   emit_int8((unsigned char)(0xC0 | encode));
1335 }
1336 
1337 void Assembler::andl(Address dst, int32_t imm32) {
1338   InstructionMark im(this);
1339   prefix(dst);
1340   emit_int8((unsigned char)0x81);
1341   emit_operand(rsp, dst, 4);
1342   emit_int32(imm32);
1343 }
1344 
1345 void Assembler::andl(Register dst, int32_t imm32) {
1346   prefix(dst);
1347   emit_arith(0x81, 0xE0, dst, imm32);
1348 }
1349 
1350 void Assembler::andl(Register dst, Address src) {
1351   InstructionMark im(this);
1352   prefix(src, dst);
1353   emit_int8(0x23);
1354   emit_operand(dst, src);
1355 }
1356 
1357 void Assembler::andl(Register dst, Register src) {
1358   (void) prefix_and_encode(dst->encoding(), src->encoding());
1359   emit_arith(0x23, 0xC0, dst, src);
1360 }
1361 
1362 void Assembler::andnl(Register dst, Register src1, Register src2) {
1363   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1364   int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
1365   emit_int8((unsigned char)0xF2);
1366   emit_int8((unsigned char)(0xC0 | encode));
1367 }
1368 
1369 void Assembler::andnl(Register dst, Register src1, Address src2) {
1370   InstructionMark im(this);
1371   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1372   vex_prefix_0F38_legacy(dst, src1, src2);
1373   emit_int8((unsigned char)0xF2);
1374   emit_operand(dst, src2);
1375 }
1376 
1377 void Assembler::bsfl(Register dst, Register src) {
1378   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_int8(0x0F);
1380   emit_int8((unsigned char)0xBC);
1381   emit_int8((unsigned char)(0xC0 | encode));
1382 }
1383 
1384 void Assembler::bsrl(Register dst, Register src) {
1385   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1386   emit_int8(0x0F);
1387   emit_int8((unsigned char)0xBD);
1388   emit_int8((unsigned char)(0xC0 | encode));
1389 }
1390 
1391 void Assembler::bswapl(Register reg) { // bswap
1392   int encode = prefix_and_encode(reg->encoding());
1393   emit_int8(0x0F);
1394   emit_int8((unsigned char)(0xC8 | encode));
1395 }
1396 
1397 void Assembler::blsil(Register dst, Register src) {
1398   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1399   int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
1400   emit_int8((unsigned char)0xF3);
1401   emit_int8((unsigned char)(0xC0 | encode));
1402 }
1403 
1404 void Assembler::blsil(Register dst, Address src) {
1405   InstructionMark im(this);
1406   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1407   vex_prefix_0F38_legacy(rbx, dst, src);
1408   emit_int8((unsigned char)0xF3);
1409   emit_operand(rbx, src);
1410 }
1411 
1412 void Assembler::blsmskl(Register dst, Register src) {
1413   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1414   int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
1415   emit_int8((unsigned char)0xF3);
1416   emit_int8((unsigned char)(0xC0 | encode));
1417 }
1418 
1419 void Assembler::blsmskl(Register dst, Address src) {
1420   InstructionMark im(this);
1421   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1422   vex_prefix_0F38_legacy(rdx, dst, src);
1423   emit_int8((unsigned char)0xF3);
1424   emit_operand(rdx, src);
1425 }
1426 
1427 void Assembler::blsrl(Register dst, Register src) {
1428   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1429   int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
1430   emit_int8((unsigned char)0xF3);
1431   emit_int8((unsigned char)(0xC0 | encode));
1432 }
1433 
1434 void Assembler::blsrl(Register dst, Address src) {
1435   InstructionMark im(this);
1436   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1437   vex_prefix_0F38_legacy(rcx, dst, src);
1438   emit_int8((unsigned char)0xF3);
1439   emit_operand(rcx, src);
1440 }
1441 
1442 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1443   // suspect disp32 is always good
1444   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1445 
1446   if (L.is_bound()) {
1447     const int long_size = 5;
1448     int offs = (int)( target(L) - pc() );
1449     assert(offs <= 0, "assembler error");
1450     InstructionMark im(this);
1451     // 1110 1000 #32-bit disp
1452     emit_int8((unsigned char)0xE8);
1453     emit_data(offs - long_size, rtype, operand);
1454   } else {
1455     InstructionMark im(this);
1456     // 1110 1000 #32-bit disp
1457     L.add_patch_at(code(), locator());
1458 
1459     emit_int8((unsigned char)0xE8);
1460     emit_data(int(0), rtype, operand);
1461   }
1462 }
1463 
1464 void Assembler::call(Register dst) {
1465   int encode = prefix_and_encode(dst->encoding());
1466   emit_int8((unsigned char)0xFF);
1467   emit_int8((unsigned char)(0xD0 | encode));
1468 }
1469 
1470 
1471 void Assembler::call(Address adr) {
1472   InstructionMark im(this);
1473   prefix(adr);
1474   emit_int8((unsigned char)0xFF);
1475   emit_operand(rdx, adr);
1476 }
1477 
1478 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1479   assert(entry != NULL, "call most probably wrong");
1480   InstructionMark im(this);
1481   emit_int8((unsigned char)0xE8);
1482   intptr_t disp = entry - (pc() + sizeof(int32_t));
1483   assert(is_simm32(disp), "must be 32bit offset (call2)");
1484   // Technically, should use call32_operand, but this format is
1485   // implied by the fact that we're emitting a call instruction.
1486 
1487   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1488   emit_data((int) disp, rspec, operand);
1489 }
1490 
1491 void Assembler::cdql() {
1492   emit_int8((unsigned char)0x99);
1493 }
1494 
1495 void Assembler::cld() {
1496   emit_int8((unsigned char)0xFC);
1497 }
1498 
1499 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1500   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1501   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1502   emit_int8(0x0F);
1503   emit_int8(0x40 | cc);
1504   emit_int8((unsigned char)(0xC0 | encode));
1505 }
1506 
1507 
1508 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1509   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1510   prefix(src, dst);
1511   emit_int8(0x0F);
1512   emit_int8(0x40 | cc);
1513   emit_operand(dst, src);
1514 }
1515 
1516 void Assembler::cmpb(Address dst, int imm8) {
1517   InstructionMark im(this);
1518   prefix(dst);
1519   emit_int8((unsigned char)0x80);
1520   emit_operand(rdi, dst, 1);
1521   emit_int8(imm8);
1522 }
1523 
1524 void Assembler::cmpl(Address dst, int32_t imm32) {
1525   InstructionMark im(this);
1526   prefix(dst);
1527   emit_int8((unsigned char)0x81);
1528   emit_operand(rdi, dst, 4);
1529   emit_int32(imm32);
1530 }
1531 
1532 void Assembler::cmpl(Register dst, int32_t imm32) {
1533   prefix(dst);
1534   emit_arith(0x81, 0xF8, dst, imm32);
1535 }
1536 
1537 void Assembler::cmpl(Register dst, Register src) {
1538   (void) prefix_and_encode(dst->encoding(), src->encoding());
1539   emit_arith(0x3B, 0xC0, dst, src);
1540 }
1541 
1542 void Assembler::cmpl(Register dst, Address  src) {
1543   InstructionMark im(this);
1544   prefix(src, dst);
1545   emit_int8((unsigned char)0x3B);
1546   emit_operand(dst, src);
1547 }
1548 
1549 void Assembler::cmpw(Address dst, int imm16) {
1550   InstructionMark im(this);
1551   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1552   emit_int8(0x66);
1553   emit_int8((unsigned char)0x81);
1554   emit_operand(rdi, dst, 2);
1555   emit_int16(imm16);
1556 }
1557 
1558 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1559 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1560 // The ZF is set if the compared values were equal, and cleared otherwise.
1561 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1562   InstructionMark im(this);
1563   prefix(adr, reg);
1564   emit_int8(0x0F);
1565   emit_int8((unsigned char)0xB1);
1566   emit_operand(reg, adr);
1567 }
1568 
1569 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1570 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1571 // The ZF is set if the compared values were equal, and cleared otherwise.
1572 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1573   InstructionMark im(this);
1574   prefix(adr, reg, true);
1575   emit_int8(0x0F);
1576   emit_int8((unsigned char)0xB0);
1577   emit_operand(reg, adr);
1578 }
1579 
1580 void Assembler::comisd(XMMRegister dst, Address src) {
1581   // NOTE: dbx seems to decode this as comiss even though the
1582   // 0x66 is there. Strangly ucomisd comes out correct
1583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1584   if (VM_Version::supports_evex()) {
1585     _tuple_type = EVEX_T1S;
1586     _input_size_in_bits = EVEX_64bit;
1587     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1588   } else {
1589     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1590   }
1591 }
1592 
1593 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1594   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1595   if (VM_Version::supports_evex()) {
1596     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1597   } else {
1598     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1599   }
1600 }
1601 
1602 void Assembler::comiss(XMMRegister dst, Address src) {
1603   if (VM_Version::supports_evex()) {
1604     _tuple_type = EVEX_T1S;
1605     _input_size_in_bits = EVEX_32bit;
1606   }
1607   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1608   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1609 }
1610 
1611 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1612   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1613   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1614 }
1615 
1616 void Assembler::cpuid() {
1617   emit_int8(0x0F);
1618   emit_int8((unsigned char)0xA2);
1619 }
1620 
1621 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1622 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1623 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1624 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1625 //
1626 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1627 //
1628 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1629 //
1630 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1631 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1632   assert(VM_Version::supports_sse4_2(), "");
1633   int8_t w = 0x01;
1634   Prefix p = Prefix_EMPTY;
1635 
1636   emit_int8((int8_t)0xF2);
1637   switch (sizeInBytes) {
1638   case 1:
1639     w = 0;
1640     break;
1641   case 2:
1642   case 4:
1643     break;
1644   LP64_ONLY(case 8:)
1645     // This instruction is not valid in 32 bits
1646     // Note:
1647     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1648     //
1649     // Page B - 72   Vol. 2C says
1650     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1651     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1652     //                                                                            F0!!!
1653     // while 3 - 208 Vol. 2A
1654     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1655     //
1656     // the 0 on a last bit is reserved for a different flavor of this instruction :
1657     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1658     p = REX_W;
1659     break;
1660   default:
1661     assert(0, "Unsupported value for a sizeInBytes argument");
1662     break;
1663   }
1664   LP64_ONLY(prefix(crc, v, p);)
1665   emit_int8((int8_t)0x0F);
1666   emit_int8(0x38);
1667   emit_int8((int8_t)(0xF0 | w));
1668   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1669 }
1670 
1671 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1672   assert(VM_Version::supports_sse4_2(), "");
1673   InstructionMark im(this);
1674   int8_t w = 0x01;
1675   Prefix p = Prefix_EMPTY;
1676 
1677   emit_int8((int8_t)0xF2);
1678   switch (sizeInBytes) {
1679   case 1:
1680     w = 0;
1681     break;
1682   case 2:
1683   case 4:
1684     break;
1685   LP64_ONLY(case 8:)
1686     // This instruction is not valid in 32 bits
1687     p = REX_W;
1688     break;
1689   default:
1690     assert(0, "Unsupported value for a sizeInBytes argument");
1691     break;
1692   }
1693   LP64_ONLY(prefix(crc, adr, p);)
1694   emit_int8((int8_t)0x0F);
1695   emit_int8(0x38);
1696   emit_int8((int8_t)(0xF0 | w));
1697   emit_operand(crc, adr);
1698 }
1699 
1700 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1701   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1702   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
1703 }
1704 
1705 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1706   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1707   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
1708 }
1709 
1710 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1712   if (VM_Version::supports_evex()) {
1713     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1714   } else {
1715     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1716   }
1717 }
1718 
1719 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1720   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1721   if (VM_Version::supports_evex()) {
1722     _tuple_type = EVEX_T1F;
1723     _input_size_in_bits = EVEX_64bit;
1724     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1725   } else {
1726     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1727   }
1728 }
1729 
1730 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1731   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1732   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
1733   emit_int8(0x2A);
1734   emit_int8((unsigned char)(0xC0 | encode));
1735 }
1736 
1737 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1738   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1739   if (VM_Version::supports_evex()) {
1740     _tuple_type = EVEX_T1S;
1741     _input_size_in_bits = EVEX_32bit;
1742     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
1743   } else {
1744     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1745   }
1746 }
1747 
1748 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1749   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1750   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1751   emit_int8(0x2A);
1752   emit_int8((unsigned char)(0xC0 | encode));
1753 }
1754 
1755 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1756   if (VM_Version::supports_evex()) {
1757     _tuple_type = EVEX_T1S;
1758     _input_size_in_bits = EVEX_32bit;
1759   }
1760   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1761   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1762 }
1763 
1764 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1765   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1766   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1767   emit_int8(0x2A);
1768   emit_int8((unsigned char)(0xC0 | encode));
1769 }
1770 
1771 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1772   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1773   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1774 }
1775 
1776 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1777   if (VM_Version::supports_evex()) {
1778     _tuple_type = EVEX_T1S;
1779     _input_size_in_bits = EVEX_32bit;
1780   }
1781   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1782   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1783 }
1784 
1785 
1786 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1787   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1788   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
1789   emit_int8(0x2C);
1790   emit_int8((unsigned char)(0xC0 | encode));
1791 }
1792 
1793 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1794   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1795   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
1796   emit_int8(0x2C);
1797   emit_int8((unsigned char)(0xC0 | encode));
1798 }
1799 
1800 void Assembler::decl(Address dst) {
1801   // Don't use it directly. Use MacroAssembler::decrement() instead.
1802   InstructionMark im(this);
1803   prefix(dst);
1804   emit_int8((unsigned char)0xFF);
1805   emit_operand(rcx, dst);
1806 }
1807 
1808 void Assembler::divsd(XMMRegister dst, Address src) {
1809   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1810   if (VM_Version::supports_evex()) {
1811     _tuple_type = EVEX_T1S;
1812     _input_size_in_bits = EVEX_64bit;
1813     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1814   } else {
1815     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1816   }
1817 }
1818 
1819 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1820   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1821   if (VM_Version::supports_evex()) {
1822     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1823   } else {
1824     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1825   }
1826 }
1827 
1828 void Assembler::divss(XMMRegister dst, Address src) {
1829   if (VM_Version::supports_evex()) {
1830     _tuple_type = EVEX_T1S;
1831     _input_size_in_bits = EVEX_32bit;
1832   }
1833   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1834   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1835 }
1836 
1837 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1838   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1839   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1840 }
1841 
1842 void Assembler::emms() {
1843   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1844   emit_int8(0x0F);
1845   emit_int8(0x77);
1846 }
1847 
1848 void Assembler::hlt() {
1849   emit_int8((unsigned char)0xF4);
1850 }
1851 
1852 void Assembler::idivl(Register src) {
1853   int encode = prefix_and_encode(src->encoding());
1854   emit_int8((unsigned char)0xF7);
1855   emit_int8((unsigned char)(0xF8 | encode));
1856 }
1857 
1858 void Assembler::divl(Register src) { // Unsigned
1859   int encode = prefix_and_encode(src->encoding());
1860   emit_int8((unsigned char)0xF7);
1861   emit_int8((unsigned char)(0xF0 | encode));
1862 }
1863 
1864 void Assembler::imull(Register dst, Register src) {
1865   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1866   emit_int8(0x0F);
1867   emit_int8((unsigned char)0xAF);
1868   emit_int8((unsigned char)(0xC0 | encode));
1869 }
1870 
1871 
1872 void Assembler::imull(Register dst, Register src, int value) {
1873   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1874   if (is8bit(value)) {
1875     emit_int8(0x6B);
1876     emit_int8((unsigned char)(0xC0 | encode));
1877     emit_int8(value & 0xFF);
1878   } else {
1879     emit_int8(0x69);
1880     emit_int8((unsigned char)(0xC0 | encode));
1881     emit_int32(value);
1882   }
1883 }
1884 
1885 void Assembler::imull(Register dst, Address src) {
1886   InstructionMark im(this);
1887   prefix(src, dst);
1888   emit_int8(0x0F);
1889   emit_int8((unsigned char) 0xAF);
1890   emit_operand(dst, src);
1891 }
1892 
1893 
1894 void Assembler::incl(Address dst) {
1895   // Don't use it directly. Use MacroAssembler::increment() instead.
1896   InstructionMark im(this);
1897   prefix(dst);
1898   emit_int8((unsigned char)0xFF);
1899   emit_operand(rax, dst);
1900 }
1901 
1902 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1903   InstructionMark im(this);
1904   assert((0 <= cc) && (cc < 16), "illegal cc");
1905   if (L.is_bound()) {
1906     address dst = target(L);
1907     assert(dst != NULL, "jcc most probably wrong");
1908 
1909     const int short_size = 2;
1910     const int long_size = 6;
1911     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1912     if (maybe_short && is8bit(offs - short_size)) {
1913       // 0111 tttn #8-bit disp
1914       emit_int8(0x70 | cc);
1915       emit_int8((offs - short_size) & 0xFF);
1916     } else {
1917       // 0000 1111 1000 tttn #32-bit disp
1918       assert(is_simm32(offs - long_size),
1919              "must be 32bit offset (call4)");
1920       emit_int8(0x0F);
1921       emit_int8((unsigned char)(0x80 | cc));
1922       emit_int32(offs - long_size);
1923     }
1924   } else {
1925     // Note: could eliminate cond. jumps to this jump if condition
1926     //       is the same however, seems to be rather unlikely case.
1927     // Note: use jccb() if label to be bound is very close to get
1928     //       an 8-bit displacement
1929     L.add_patch_at(code(), locator());
1930     emit_int8(0x0F);
1931     emit_int8((unsigned char)(0x80 | cc));
1932     emit_int32(0);
1933   }
1934 }
1935 
1936 void Assembler::jccb(Condition cc, Label& L) {
1937   if (L.is_bound()) {
1938     const int short_size = 2;
1939     address entry = target(L);
1940 #ifdef ASSERT
1941     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1942     intptr_t delta = short_branch_delta();
1943     if (delta != 0) {
1944       dist += (dist < 0 ? (-delta) :delta);
1945     }
1946     assert(is8bit(dist), "Dispacement too large for a short jmp");
1947 #endif
1948     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1949     // 0111 tttn #8-bit disp
1950     emit_int8(0x70 | cc);
1951     emit_int8((offs - short_size) & 0xFF);
1952   } else {
1953     InstructionMark im(this);
1954     L.add_patch_at(code(), locator());
1955     emit_int8(0x70 | cc);
1956     emit_int8(0);
1957   }
1958 }
1959 
1960 void Assembler::jmp(Address adr) {
1961   InstructionMark im(this);
1962   prefix(adr);
1963   emit_int8((unsigned char)0xFF);
1964   emit_operand(rsp, adr);
1965 }
1966 
1967 void Assembler::jmp(Label& L, bool maybe_short) {
1968   if (L.is_bound()) {
1969     address entry = target(L);
1970     assert(entry != NULL, "jmp most probably wrong");
1971     InstructionMark im(this);
1972     const int short_size = 2;
1973     const int long_size = 5;
1974     intptr_t offs = entry - pc();
1975     if (maybe_short && is8bit(offs - short_size)) {
1976       emit_int8((unsigned char)0xEB);
1977       emit_int8((offs - short_size) & 0xFF);
1978     } else {
1979       emit_int8((unsigned char)0xE9);
1980       emit_int32(offs - long_size);
1981     }
1982   } else {
1983     // By default, forward jumps are always 32-bit displacements, since
1984     // we can't yet know where the label will be bound.  If you're sure that
1985     // the forward jump will not run beyond 256 bytes, use jmpb to
1986     // force an 8-bit displacement.
1987     InstructionMark im(this);
1988     L.add_patch_at(code(), locator());
1989     emit_int8((unsigned char)0xE9);
1990     emit_int32(0);
1991   }
1992 }
1993 
1994 void Assembler::jmp(Register entry) {
1995   int encode = prefix_and_encode(entry->encoding());
1996   emit_int8((unsigned char)0xFF);
1997   emit_int8((unsigned char)(0xE0 | encode));
1998 }
1999 
2000 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2001   InstructionMark im(this);
2002   emit_int8((unsigned char)0xE9);
2003   assert(dest != NULL, "must have a target");
2004   intptr_t disp = dest - (pc() + sizeof(int32_t));
2005   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2006   emit_data(disp, rspec.reloc(), call32_operand);
2007 }
2008 
2009 void Assembler::jmpb(Label& L) {
2010   if (L.is_bound()) {
2011     const int short_size = 2;
2012     address entry = target(L);
2013     assert(entry != NULL, "jmp most probably wrong");
2014 #ifdef ASSERT
2015     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2016     intptr_t delta = short_branch_delta();
2017     if (delta != 0) {
2018       dist += (dist < 0 ? (-delta) :delta);
2019     }
2020     assert(is8bit(dist), "Dispacement too large for a short jmp");
2021 #endif
2022     intptr_t offs = entry - pc();
2023     emit_int8((unsigned char)0xEB);
2024     emit_int8((offs - short_size) & 0xFF);
2025   } else {
2026     InstructionMark im(this);
2027     L.add_patch_at(code(), locator());
2028     emit_int8((unsigned char)0xEB);
2029     emit_int8(0);
2030   }
2031 }
2032 
2033 void Assembler::ldmxcsr( Address src) {
2034   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2035   InstructionMark im(this);
2036   prefix(src);
2037   emit_int8(0x0F);
2038   emit_int8((unsigned char)0xAE);
2039   emit_operand(as_Register(2), src);
2040 }
2041 
2042 void Assembler::leal(Register dst, Address src) {
2043   InstructionMark im(this);
2044 #ifdef _LP64
2045   emit_int8(0x67); // addr32
2046   prefix(src, dst);
2047 #endif // LP64
2048   emit_int8((unsigned char)0x8D);
2049   emit_operand(dst, src);
2050 }
2051 
2052 void Assembler::lfence() {
2053   emit_int8(0x0F);
2054   emit_int8((unsigned char)0xAE);
2055   emit_int8((unsigned char)0xE8);
2056 }
2057 
2058 void Assembler::lock() {
2059   emit_int8((unsigned char)0xF0);
2060 }
2061 
2062 void Assembler::lzcntl(Register dst, Register src) {
2063   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2064   emit_int8((unsigned char)0xF3);
2065   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2066   emit_int8(0x0F);
2067   emit_int8((unsigned char)0xBD);
2068   emit_int8((unsigned char)(0xC0 | encode));
2069 }
2070 
2071 // Emit mfence instruction
2072 void Assembler::mfence() {
2073   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2074   emit_int8(0x0F);
2075   emit_int8((unsigned char)0xAE);
2076   emit_int8((unsigned char)0xF0);
2077 }
2078 
2079 void Assembler::mov(Register dst, Register src) {
2080   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2081 }
2082 
2083 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2084   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2085   if (VM_Version::supports_avx512novl()) {
2086     int vector_len = AVX_512bit;
2087     int dst_enc = dst->encoding();
2088     int src_enc = src->encoding();
2089     int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
2090                                        /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2091     emit_int8(0x28);
2092     emit_int8((unsigned char)(0xC0 | encode));
2093   } else if (VM_Version::supports_evex()) {
2094     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
2095   } else {
2096     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
2097   }
2098 }
2099 
2100 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2101   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2102   if (VM_Version::supports_avx512novl()) {
2103     int vector_len = AVX_512bit;
2104     int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
2105     emit_int8(0x28);
2106     emit_int8((unsigned char)(0xC0 | encode));
2107   } else {
2108     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
2109   }
2110 }
2111 
2112 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2113   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2114   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2115   emit_int8(0x16);
2116   emit_int8((unsigned char)(0xC0 | encode));
2117 }
2118 
2119 void Assembler::movb(Register dst, Address src) {
2120   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2121   InstructionMark im(this);
2122   prefix(src, dst, true);
2123   emit_int8((unsigned char)0x8A);
2124   emit_operand(dst, src);
2125 }
2126 
2127 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2128   _instruction_uses_vl = true;
2129   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2130   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F,
2131                                       /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false);
2132   emit_int8(0x12);
2133   emit_int8(0xC0 | encode);
2134 
2135 }
2136 
2137 void Assembler::kmovql(KRegister dst, KRegister src) {
2138   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2139   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
2140                                       /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
2141   emit_int8((unsigned char)0x90);
2142   emit_int8((unsigned char)(0xC0 | encode));
2143 }
2144 
2145 void Assembler::kmovql(KRegister dst, Address src) {
2146   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2147   int dst_enc = dst->encoding();
2148   int nds_enc = 0;
2149   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
2150              VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2151   emit_int8((unsigned char)0x90);
2152   emit_operand((Register)dst, src);
2153 }
2154 
2155 void Assembler::kmovql(Address dst, KRegister src) {
2156   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2157   int src_enc = src->encoding();
2158   int nds_enc = 0;
2159   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
2160              VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2161   emit_int8((unsigned char)0x90);
2162   emit_operand((Register)src, dst);
2163 }
2164 
2165 void Assembler::kmovql(KRegister dst, Register src) {
2166   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2167   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2168   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
2169                                       VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
2170   emit_int8((unsigned char)0x92);
2171   emit_int8((unsigned char)(0xC0 | encode));
2172 }
2173 
2174 void Assembler::kmovdl(KRegister dst, Register src) {
2175   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2176   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2177   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
2178   emit_int8((unsigned char)0x92);
2179   emit_int8((unsigned char)(0xC0 | encode));
2180 }
2181 
2182 void Assembler::kmovwl(KRegister dst, Register src) {
2183   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2184   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2185   emit_int8((unsigned char)0x92);
2186   emit_int8((unsigned char)(0xC0 | encode));
2187 }
2188 
2189 void Assembler::movb(Address dst, int imm8) {
2190   InstructionMark im(this);
2191    prefix(dst);
2192   emit_int8((unsigned char)0xC6);
2193   emit_operand(rax, dst, 1);
2194   emit_int8(imm8);
2195 }
2196 
2197 
2198 void Assembler::movb(Address dst, Register src) {
2199   assert(src->has_byte_register(), "must have byte register");
2200   InstructionMark im(this);
2201   prefix(dst, src, true);
2202   emit_int8((unsigned char)0x88);
2203   emit_operand(src, dst);
2204 }
2205 
2206 void Assembler::movdl(XMMRegister dst, Register src) {
2207   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2208   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2209   emit_int8(0x6E);
2210   emit_int8((unsigned char)(0xC0 | encode));
2211 }
2212 
2213 void Assembler::movdl(Register dst, XMMRegister src) {
2214   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2215   // swap src/dst to get correct prefix
2216   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
2217   emit_int8(0x7E);
2218   emit_int8((unsigned char)(0xC0 | encode));
2219 }
2220 
2221 void Assembler::movdl(XMMRegister dst, Address src) {
2222   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2223   if (VM_Version::supports_evex()) {
2224     _tuple_type = EVEX_T1S;
2225     _input_size_in_bits = EVEX_32bit;
2226   }
2227   InstructionMark im(this);
2228   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2229   emit_int8(0x6E);
2230   emit_operand(dst, src);
2231 }
2232 
2233 void Assembler::movdl(Address dst, XMMRegister src) {
2234   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2235   if (VM_Version::supports_evex()) {
2236     _tuple_type = EVEX_T1S;
2237     _input_size_in_bits = EVEX_32bit;
2238   }
2239   InstructionMark im(this);
2240   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2241   emit_int8(0x7E);
2242   emit_operand(src, dst);
2243 }
2244 
2245 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2246   _instruction_uses_vl = true;
2247   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2248   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2249 }
2250 
2251 void Assembler::movdqa(XMMRegister dst, Address src) {
2252   _instruction_uses_vl = true;
2253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2254   if (VM_Version::supports_evex()) {
2255     _tuple_type = EVEX_FVM;
2256   }
2257   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2258 }
2259 
2260 void Assembler::movdqu(XMMRegister dst, Address src) {
2261   _instruction_uses_vl = true;
2262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2263   if (VM_Version::supports_evex()) {
2264     _tuple_type = EVEX_FVM;
2265   }
2266   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2267 }
2268 
2269 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2270   _instruction_uses_vl = true;
2271   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2272   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2273 }
2274 
2275 void Assembler::movdqu(Address dst, XMMRegister src) {
2276   _instruction_uses_vl = true;
2277   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2278   if (VM_Version::supports_evex()) {
2279     _tuple_type = EVEX_FVM;
2280   }
2281   InstructionMark im(this);
2282   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2283   emit_int8(0x7F);
2284   emit_operand(src, dst);
2285 }
2286 
2287 // Move Unaligned 256bit Vector
2288 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2289   _instruction_uses_vl = true;
2290   assert(UseAVX > 0, "");
2291   int vector_len = AVX_256bit;
2292   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2293   emit_int8(0x6F);
2294   emit_int8((unsigned char)(0xC0 | encode));
2295 }
2296 
2297 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2298   _instruction_uses_vl = true;
2299   assert(UseAVX > 0, "");
2300   if (VM_Version::supports_evex()) {
2301     _tuple_type = EVEX_FVM;
2302   }
2303   InstructionMark im(this);
2304   int vector_len = AVX_256bit;
2305   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2306   emit_int8(0x6F);
2307   emit_operand(dst, src);
2308 }
2309 
2310 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2311   _instruction_uses_vl = true;
2312   assert(UseAVX > 0, "");
2313   if (VM_Version::supports_evex()) {
2314     _tuple_type = EVEX_FVM;
2315   }
2316   InstructionMark im(this);
2317   int vector_len = AVX_256bit;
2318   // swap src<->dst for encoding
2319   assert(src != xnoreg, "sanity");
2320   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2321   emit_int8(0x7F);
2322   emit_operand(src, dst);
2323 }
2324 
2325 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2326 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2327   _instruction_uses_vl = true;
2328   assert(UseAVX > 0, "");
2329   int src_enc = src->encoding();
2330   int dst_enc = dst->encoding();
2331   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2332                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2333   emit_int8(0x6F);
2334   emit_int8((unsigned char)(0xC0 | encode));
2335 }
2336 
2337 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2338   _instruction_uses_vl = true;
2339   assert(UseAVX > 0, "");
2340   InstructionMark im(this);
2341   if (VM_Version::supports_evex()) {
2342     _tuple_type = EVEX_FVM;
2343   }
2344   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2345   emit_int8(0x6F);
2346   emit_operand(dst, src);
2347 }
2348 
2349 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2350   _instruction_uses_vl = true;
2351   assert(UseAVX > 0, "");
2352   InstructionMark im(this);
2353   assert(src != xnoreg, "sanity");
2354   if (VM_Version::supports_evex()) {
2355     _tuple_type = EVEX_FVM;
2356   }
2357   // swap src<->dst for encoding
2358   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2359   emit_int8(0x7F);
2360   emit_operand(src, dst);
2361 }
2362 
2363 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2364   _instruction_uses_vl = true;
2365   assert(UseAVX > 0, "");
2366   int src_enc = src->encoding();
2367   int dst_enc = dst->encoding();
2368   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2369                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2370   emit_int8(0x6F);
2371   emit_int8((unsigned char)(0xC0 | encode));
2372 }
2373 
2374 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2375   _instruction_uses_vl = true;
2376   assert(UseAVX > 2, "");
2377   InstructionMark im(this);
2378   _tuple_type = EVEX_FVM;
2379   vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2380   emit_int8(0x6F);
2381   emit_operand(dst, src);
2382 }
2383 
2384 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2385   _instruction_uses_vl = true;
2386   assert(UseAVX > 2, "");
2387   InstructionMark im(this);
2388   assert(src != xnoreg, "sanity");
2389   _tuple_type = EVEX_FVM;
2390   // swap src<->dst for encoding
2391   vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2392   emit_int8(0x7F);
2393   emit_operand(src, dst);
2394 }
2395 
2396 // Uses zero extension on 64bit
2397 
2398 void Assembler::movl(Register dst, int32_t imm32) {
2399   int encode = prefix_and_encode(dst->encoding());
2400   emit_int8((unsigned char)(0xB8 | encode));
2401   emit_int32(imm32);
2402 }
2403 
2404 void Assembler::movl(Register dst, Register src) {
2405   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2406   emit_int8((unsigned char)0x8B);
2407   emit_int8((unsigned char)(0xC0 | encode));
2408 }
2409 
2410 void Assembler::movl(Register dst, Address src) {
2411   InstructionMark im(this);
2412   prefix(src, dst);
2413   emit_int8((unsigned char)0x8B);
2414   emit_operand(dst, src);
2415 }
2416 
2417 void Assembler::movl(Address dst, int32_t imm32) {
2418   InstructionMark im(this);
2419   prefix(dst);
2420   emit_int8((unsigned char)0xC7);
2421   emit_operand(rax, dst, 4);
2422   emit_int32(imm32);
2423 }
2424 
2425 void Assembler::movl(Address dst, Register src) {
2426   InstructionMark im(this);
2427   prefix(dst, src);
2428   emit_int8((unsigned char)0x89);
2429   emit_operand(src, dst);
2430 }
2431 
2432 // New cpus require to use movsd and movss to avoid partial register stall
2433 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2434 // The selection is done in MacroAssembler::movdbl() and movflt().
2435 void Assembler::movlpd(XMMRegister dst, Address src) {
2436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2437   if (VM_Version::supports_evex()) {
2438     _tuple_type = EVEX_T1S;
2439     _input_size_in_bits = EVEX_32bit;
2440     emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2441   } else {
2442     emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2443   }
2444 }
2445 
2446 void Assembler::movq( MMXRegister dst, Address src ) {
2447   assert( VM_Version::supports_mmx(), "" );
2448   emit_int8(0x0F);
2449   emit_int8(0x6F);
2450   emit_operand(dst, src);
2451 }
2452 
2453 void Assembler::movq( Address dst, MMXRegister src ) {
2454   assert( VM_Version::supports_mmx(), "" );
2455   emit_int8(0x0F);
2456   emit_int8(0x7F);
2457   // workaround gcc (3.2.1-7a) bug
2458   // In that version of gcc with only an emit_operand(MMX, Address)
2459   // gcc will tail jump and try and reverse the parameters completely
2460   // obliterating dst in the process. By having a version available
2461   // that doesn't need to swap the args at the tail jump the bug is
2462   // avoided.
2463   emit_operand(dst, src);
2464 }
2465 
2466 void Assembler::movq(XMMRegister dst, Address src) {
2467   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2468   InstructionMark im(this);
2469   if (VM_Version::supports_evex()) {
2470     _tuple_type = EVEX_T1S;
2471     _input_size_in_bits = EVEX_64bit;
2472     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2473   } else {
2474     simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2475   }
2476   emit_int8(0x7E);
2477   emit_operand(dst, src);
2478 }
2479 
2480 void Assembler::movq(Address dst, XMMRegister src) {
2481   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2482   InstructionMark im(this);
2483   if (VM_Version::supports_evex()) {
2484     _tuple_type = EVEX_T1S;
2485     _input_size_in_bits = EVEX_64bit;
2486     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
2487                 VEX_OPCODE_0F, /* rex_w */ true);
2488   } else {
2489     simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2490   }
2491   emit_int8((unsigned char)0xD6);
2492   emit_operand(src, dst);
2493 }
2494 
2495 void Assembler::movsbl(Register dst, Address src) { // movsxb
2496   InstructionMark im(this);
2497   prefix(src, dst);
2498   emit_int8(0x0F);
2499   emit_int8((unsigned char)0xBE);
2500   emit_operand(dst, src);
2501 }
2502 
2503 void Assembler::movsbl(Register dst, Register src) { // movsxb
2504   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2505   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2506   emit_int8(0x0F);
2507   emit_int8((unsigned char)0xBE);
2508   emit_int8((unsigned char)(0xC0 | encode));
2509 }
2510 
2511 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2513   if (VM_Version::supports_evex()) {
2514     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2515   } else {
2516     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2517   }
2518 }
2519 
2520 void Assembler::movsd(XMMRegister dst, Address src) {
2521   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2522   if (VM_Version::supports_evex()) {
2523     _tuple_type = EVEX_T1S;
2524     _input_size_in_bits = EVEX_64bit;
2525     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2526   } else {
2527     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2528   }
2529 }
2530 
2531 void Assembler::movsd(Address dst, XMMRegister src) {
2532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2533   InstructionMark im(this);
2534   if (VM_Version::supports_evex()) {
2535     _tuple_type = EVEX_T1S;
2536     _input_size_in_bits = EVEX_64bit;
2537     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2538   } else {
2539     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
2540   }
2541   emit_int8(0x11);
2542   emit_operand(src, dst);
2543 }
2544 
2545 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2546   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2547   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2548 }
2549 
2550 void Assembler::movss(XMMRegister dst, Address src) {
2551   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2552   if (VM_Version::supports_evex()) {
2553     _tuple_type = EVEX_T1S;
2554     _input_size_in_bits = EVEX_32bit;
2555   }
2556   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2557 }
2558 
2559 void Assembler::movss(Address dst, XMMRegister src) {
2560   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2561   if (VM_Version::supports_evex()) {
2562     _tuple_type = EVEX_T1S;
2563     _input_size_in_bits = EVEX_32bit;
2564   }
2565   InstructionMark im(this);
2566   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2567   emit_int8(0x11);
2568   emit_operand(src, dst);
2569 }
2570 
2571 void Assembler::movswl(Register dst, Address src) { // movsxw
2572   InstructionMark im(this);
2573   prefix(src, dst);
2574   emit_int8(0x0F);
2575   emit_int8((unsigned char)0xBF);
2576   emit_operand(dst, src);
2577 }
2578 
2579 void Assembler::movswl(Register dst, Register src) { // movsxw
2580   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2581   emit_int8(0x0F);
2582   emit_int8((unsigned char)0xBF);
2583   emit_int8((unsigned char)(0xC0 | encode));
2584 }
2585 
2586 void Assembler::movw(Address dst, int imm16) {
2587   InstructionMark im(this);
2588 
2589   emit_int8(0x66); // switch to 16-bit mode
2590   prefix(dst);
2591   emit_int8((unsigned char)0xC7);
2592   emit_operand(rax, dst, 2);
2593   emit_int16(imm16);
2594 }
2595 
2596 void Assembler::movw(Register dst, Address src) {
2597   InstructionMark im(this);
2598   emit_int8(0x66);
2599   prefix(src, dst);
2600   emit_int8((unsigned char)0x8B);
2601   emit_operand(dst, src);
2602 }
2603 
2604 void Assembler::movw(Address dst, Register src) {
2605   InstructionMark im(this);
2606   emit_int8(0x66);
2607   prefix(dst, src);
2608   emit_int8((unsigned char)0x89);
2609   emit_operand(src, dst);
2610 }
2611 
2612 void Assembler::movzbl(Register dst, Address src) { // movzxb
2613   InstructionMark im(this);
2614   prefix(src, dst);
2615   emit_int8(0x0F);
2616   emit_int8((unsigned char)0xB6);
2617   emit_operand(dst, src);
2618 }
2619 
2620 void Assembler::movzbl(Register dst, Register src) { // movzxb
2621   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2622   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2623   emit_int8(0x0F);
2624   emit_int8((unsigned char)0xB6);
2625   emit_int8(0xC0 | encode);
2626 }
2627 
2628 void Assembler::movzwl(Register dst, Address src) { // movzxw
2629   InstructionMark im(this);
2630   prefix(src, dst);
2631   emit_int8(0x0F);
2632   emit_int8((unsigned char)0xB7);
2633   emit_operand(dst, src);
2634 }
2635 
2636 void Assembler::movzwl(Register dst, Register src) { // movzxw
2637   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2638   emit_int8(0x0F);
2639   emit_int8((unsigned char)0xB7);
2640   emit_int8(0xC0 | encode);
2641 }
2642 
2643 void Assembler::mull(Address src) {
2644   InstructionMark im(this);
2645   prefix(src);
2646   emit_int8((unsigned char)0xF7);
2647   emit_operand(rsp, src);
2648 }
2649 
2650 void Assembler::mull(Register src) {
2651   int encode = prefix_and_encode(src->encoding());
2652   emit_int8((unsigned char)0xF7);
2653   emit_int8((unsigned char)(0xE0 | encode));
2654 }
2655 
2656 void Assembler::mulsd(XMMRegister dst, Address src) {
2657   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2658   if (VM_Version::supports_evex()) {
2659     _tuple_type = EVEX_T1S;
2660     _input_size_in_bits = EVEX_64bit;
2661     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2662   } else {
2663     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2664   }
2665 }
2666 
2667 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2668   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2669   if (VM_Version::supports_evex()) {
2670     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2671   } else {
2672     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2673   }
2674 }
2675 
2676 void Assembler::mulss(XMMRegister dst, Address src) {
2677   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2678   if (VM_Version::supports_evex()) {
2679     _tuple_type = EVEX_T1S;
2680     _input_size_in_bits = EVEX_32bit;
2681   }
2682   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2683 }
2684 
2685 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2686   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2687   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2688 }
2689 
2690 void Assembler::negl(Register dst) {
2691   int encode = prefix_and_encode(dst->encoding());
2692   emit_int8((unsigned char)0xF7);
2693   emit_int8((unsigned char)(0xD8 | encode));
2694 }
2695 
2696 void Assembler::nop(int i) {
2697 #ifdef ASSERT
2698   assert(i > 0, " ");
2699   // The fancy nops aren't currently recognized by debuggers making it a
2700   // pain to disassemble code while debugging. If asserts are on clearly
2701   // speed is not an issue so simply use the single byte traditional nop
2702   // to do alignment.
2703 
2704   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2705   return;
2706 
2707 #endif // ASSERT
2708 
2709   if (UseAddressNop && VM_Version::is_intel()) {
2710     //
2711     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2712     //  1: 0x90
2713     //  2: 0x66 0x90
2714     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2715     //  4: 0x0F 0x1F 0x40 0x00
2716     //  5: 0x0F 0x1F 0x44 0x00 0x00
2717     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2718     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2719     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2720     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2721     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2722     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2723 
2724     // The rest coding is Intel specific - don't use consecutive address nops
2725 
2726     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2727     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2728     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2729     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2730 
2731     while(i >= 15) {
2732       // For Intel don't generate consecutive addess nops (mix with regular nops)
2733       i -= 15;
2734       emit_int8(0x66);   // size prefix
2735       emit_int8(0x66);   // size prefix
2736       emit_int8(0x66);   // size prefix
2737       addr_nop_8();
2738       emit_int8(0x66);   // size prefix
2739       emit_int8(0x66);   // size prefix
2740       emit_int8(0x66);   // size prefix
2741       emit_int8((unsigned char)0x90);
2742                          // nop
2743     }
2744     switch (i) {
2745       case 14:
2746         emit_int8(0x66); // size prefix
2747       case 13:
2748         emit_int8(0x66); // size prefix
2749       case 12:
2750         addr_nop_8();
2751         emit_int8(0x66); // size prefix
2752         emit_int8(0x66); // size prefix
2753         emit_int8(0x66); // size prefix
2754         emit_int8((unsigned char)0x90);
2755                          // nop
2756         break;
2757       case 11:
2758         emit_int8(0x66); // size prefix
2759       case 10:
2760         emit_int8(0x66); // size prefix
2761       case 9:
2762         emit_int8(0x66); // size prefix
2763       case 8:
2764         addr_nop_8();
2765         break;
2766       case 7:
2767         addr_nop_7();
2768         break;
2769       case 6:
2770         emit_int8(0x66); // size prefix
2771       case 5:
2772         addr_nop_5();
2773         break;
2774       case 4:
2775         addr_nop_4();
2776         break;
2777       case 3:
2778         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2779         emit_int8(0x66); // size prefix
2780       case 2:
2781         emit_int8(0x66); // size prefix
2782       case 1:
2783         emit_int8((unsigned char)0x90);
2784                          // nop
2785         break;
2786       default:
2787         assert(i == 0, " ");
2788     }
2789     return;
2790   }
2791   if (UseAddressNop && VM_Version::is_amd()) {
2792     //
2793     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2794     //  1: 0x90
2795     //  2: 0x66 0x90
2796     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2797     //  4: 0x0F 0x1F 0x40 0x00
2798     //  5: 0x0F 0x1F 0x44 0x00 0x00
2799     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2800     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2801     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2802     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2803     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2804     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2805 
2806     // The rest coding is AMD specific - use consecutive address nops
2807 
2808     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2809     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2810     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2811     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2812     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2813     //     Size prefixes (0x66) are added for larger sizes
2814 
2815     while(i >= 22) {
2816       i -= 11;
2817       emit_int8(0x66); // size prefix
2818       emit_int8(0x66); // size prefix
2819       emit_int8(0x66); // size prefix
2820       addr_nop_8();
2821     }
2822     // Generate first nop for size between 21-12
2823     switch (i) {
2824       case 21:
2825         i -= 1;
2826         emit_int8(0x66); // size prefix
2827       case 20:
2828       case 19:
2829         i -= 1;
2830         emit_int8(0x66); // size prefix
2831       case 18:
2832       case 17:
2833         i -= 1;
2834         emit_int8(0x66); // size prefix
2835       case 16:
2836       case 15:
2837         i -= 8;
2838         addr_nop_8();
2839         break;
2840       case 14:
2841       case 13:
2842         i -= 7;
2843         addr_nop_7();
2844         break;
2845       case 12:
2846         i -= 6;
2847         emit_int8(0x66); // size prefix
2848         addr_nop_5();
2849         break;
2850       default:
2851         assert(i < 12, " ");
2852     }
2853 
2854     // Generate second nop for size between 11-1
2855     switch (i) {
2856       case 11:
2857         emit_int8(0x66); // size prefix
2858       case 10:
2859         emit_int8(0x66); // size prefix
2860       case 9:
2861         emit_int8(0x66); // size prefix
2862       case 8:
2863         addr_nop_8();
2864         break;
2865       case 7:
2866         addr_nop_7();
2867         break;
2868       case 6:
2869         emit_int8(0x66); // size prefix
2870       case 5:
2871         addr_nop_5();
2872         break;
2873       case 4:
2874         addr_nop_4();
2875         break;
2876       case 3:
2877         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2878         emit_int8(0x66); // size prefix
2879       case 2:
2880         emit_int8(0x66); // size prefix
2881       case 1:
2882         emit_int8((unsigned char)0x90);
2883                          // nop
2884         break;
2885       default:
2886         assert(i == 0, " ");
2887     }
2888     return;
2889   }
2890 
2891   // Using nops with size prefixes "0x66 0x90".
2892   // From AMD Optimization Guide:
2893   //  1: 0x90
2894   //  2: 0x66 0x90
2895   //  3: 0x66 0x66 0x90
2896   //  4: 0x66 0x66 0x66 0x90
2897   //  5: 0x66 0x66 0x90 0x66 0x90
2898   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2899   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2900   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2901   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2902   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2903   //
2904   while(i > 12) {
2905     i -= 4;
2906     emit_int8(0x66); // size prefix
2907     emit_int8(0x66);
2908     emit_int8(0x66);
2909     emit_int8((unsigned char)0x90);
2910                      // nop
2911   }
2912   // 1 - 12 nops
2913   if(i > 8) {
2914     if(i > 9) {
2915       i -= 1;
2916       emit_int8(0x66);
2917     }
2918     i -= 3;
2919     emit_int8(0x66);
2920     emit_int8(0x66);
2921     emit_int8((unsigned char)0x90);
2922   }
2923   // 1 - 8 nops
2924   if(i > 4) {
2925     if(i > 6) {
2926       i -= 1;
2927       emit_int8(0x66);
2928     }
2929     i -= 3;
2930     emit_int8(0x66);
2931     emit_int8(0x66);
2932     emit_int8((unsigned char)0x90);
2933   }
2934   switch (i) {
2935     case 4:
2936       emit_int8(0x66);
2937     case 3:
2938       emit_int8(0x66);
2939     case 2:
2940       emit_int8(0x66);
2941     case 1:
2942       emit_int8((unsigned char)0x90);
2943       break;
2944     default:
2945       assert(i == 0, " ");
2946   }
2947 }
2948 
2949 void Assembler::notl(Register dst) {
2950   int encode = prefix_and_encode(dst->encoding());
2951   emit_int8((unsigned char)0xF7);
2952   emit_int8((unsigned char)(0xD0 | encode));
2953 }
2954 
2955 void Assembler::orl(Address dst, int32_t imm32) {
2956   InstructionMark im(this);
2957   prefix(dst);
2958   emit_arith_operand(0x81, rcx, dst, imm32);
2959 }
2960 
2961 void Assembler::orl(Register dst, int32_t imm32) {
2962   prefix(dst);
2963   emit_arith(0x81, 0xC8, dst, imm32);
2964 }
2965 
2966 void Assembler::orl(Register dst, Address src) {
2967   InstructionMark im(this);
2968   prefix(src, dst);
2969   emit_int8(0x0B);
2970   emit_operand(dst, src);
2971 }
2972 
2973 void Assembler::orl(Register dst, Register src) {
2974   (void) prefix_and_encode(dst->encoding(), src->encoding());
2975   emit_arith(0x0B, 0xC0, dst, src);
2976 }
2977 
2978 void Assembler::orl(Address dst, Register src) {
2979   InstructionMark im(this);
2980   prefix(dst, src);
2981   emit_int8(0x09);
2982   emit_operand(src, dst);
2983 }
2984 
2985 void Assembler::packuswb(XMMRegister dst, Address src) {
2986   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2987   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2988   if (VM_Version::supports_evex()) {
2989     _tuple_type = EVEX_FV;
2990     _input_size_in_bits = EVEX_32bit;
2991   }
2992   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2993 }
2994 
2995 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2996   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2997   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2998 }
2999 
3000 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3001   assert(UseAVX > 0, "some form of AVX must be enabled");
3002   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
3003 }
3004 
3005 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3006   _instruction_uses_vl = true;
3007   assert(VM_Version::supports_avx2(), "");
3008   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3009                                       VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
3010   emit_int8(0x00);
3011   emit_int8(0xC0 | encode);
3012   emit_int8(imm8);
3013 }
3014 
3015 void Assembler::pause() {
3016   emit_int8((unsigned char)0xF3);
3017   emit_int8((unsigned char)0x90);
3018 }
3019 
3020 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3021   assert(VM_Version::supports_sse4_2(), "");
3022   InstructionMark im(this);
3023   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
3024               /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3025   emit_int8(0x61);
3026   emit_operand(dst, src);
3027   emit_int8(imm8);
3028 }
3029 
3030 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3031   assert(VM_Version::supports_sse4_2(), "");
3032   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3033                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3034   emit_int8(0x61);
3035   emit_int8((unsigned char)(0xC0 | encode));
3036   emit_int8(imm8);
3037 }
3038 
3039 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3040   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3041   emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
3042                   false, (VM_Version::supports_avx512dq() == false));
3043 }
3044 
3045 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3046   assert(UseAVX > 0, "some form of AVX must be enabled");
3047   emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
3048                  false, (VM_Version::supports_avx512dq() == false));
3049 }
3050 
3051 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3052   assert(VM_Version::supports_sse2(), "");
3053   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
3054                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
3055   emit_int8((unsigned char)0xD7);
3056   emit_int8((unsigned char)(0xC0 | encode));
3057 }
3058 
3059 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3060   assert(VM_Version::supports_avx2(), "");
3061   int vector_len = AVX_256bit;
3062   int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
3063                                      vector_len, VEX_OPCODE_0F, true, false);
3064   emit_int8((unsigned char)0xD7);
3065   emit_int8((unsigned char)(0xC0 | encode));
3066 }
3067 
3068 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3069   assert(VM_Version::supports_sse4_1(), "");
3070   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3071                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3072   emit_int8(0x16);
3073   emit_int8((unsigned char)(0xC0 | encode));
3074   emit_int8(imm8);
3075 }
3076 
3077 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3078   assert(VM_Version::supports_sse4_1(), "");
3079   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
3080                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3081   emit_int8(0x16);
3082   emit_int8((unsigned char)(0xC0 | encode));
3083   emit_int8(imm8);
3084 }
3085 
3086 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3087   assert(VM_Version::supports_sse2(), "");
3088   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3089                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3090   emit_int8((unsigned char)0xC5);
3091   emit_int8((unsigned char)(0xC0 | encode));
3092   emit_int8(imm8);
3093 }
3094 
3095 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3096   assert(VM_Version::supports_sse4_1(), "");
3097   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3098                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3099   emit_int8(0x22);
3100   emit_int8((unsigned char)(0xC0 | encode));
3101   emit_int8(imm8);
3102 }
3103 
3104 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3105   assert(VM_Version::supports_sse4_1(), "");
3106   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3107                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3108   emit_int8(0x22);
3109   emit_int8((unsigned char)(0xC0 | encode));
3110   emit_int8(imm8);
3111 }
3112 
3113 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3114   assert(VM_Version::supports_sse2(), "");
3115   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3116                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3117   emit_int8((unsigned char)0xC4);
3118   emit_int8((unsigned char)(0xC0 | encode));
3119   emit_int8(imm8);
3120 }
3121 
3122 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3123   assert(VM_Version::supports_sse4_1(), "");
3124   if (VM_Version::supports_evex()) {
3125     _tuple_type = EVEX_HVM;
3126   }
3127   InstructionMark im(this);
3128   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3129   emit_int8(0x30);
3130   emit_operand(dst, src);
3131 }
3132 
3133 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3134   assert(VM_Version::supports_sse4_1(), "");
3135   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3136   emit_int8(0x30);
3137   emit_int8((unsigned char)(0xC0 | encode));
3138 }
3139 
3140 void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
3141   assert(VM_Version::supports_avx(), "");
3142   InstructionMark im(this);
3143   bool vector256 = true;
3144   assert(dst != xnoreg, "sanity");
3145   int dst_enc = dst->encoding();
3146   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3147   emit_int8(0x30);
3148   emit_operand(dst, src);
3149 }
3150 
3151 // generic
3152 void Assembler::pop(Register dst) {
3153   int encode = prefix_and_encode(dst->encoding());
3154   emit_int8(0x58 | encode);
3155 }
3156 
3157 void Assembler::popcntl(Register dst, Address src) {
3158   assert(VM_Version::supports_popcnt(), "must support");
3159   InstructionMark im(this);
3160   emit_int8((unsigned char)0xF3);
3161   prefix(src, dst);
3162   emit_int8(0x0F);
3163   emit_int8((unsigned char)0xB8);
3164   emit_operand(dst, src);
3165 }
3166 
3167 void Assembler::popcntl(Register dst, Register src) {
3168   assert(VM_Version::supports_popcnt(), "must support");
3169   emit_int8((unsigned char)0xF3);
3170   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3171   emit_int8(0x0F);
3172   emit_int8((unsigned char)0xB8);
3173   emit_int8((unsigned char)(0xC0 | encode));
3174 }
3175 
3176 void Assembler::popf() {
3177   emit_int8((unsigned char)0x9D);
3178 }
3179 
3180 #ifndef _LP64 // no 32bit push/pop on amd64
3181 void Assembler::popl(Address dst) {
3182   // NOTE: this will adjust stack by 8byte on 64bits
3183   InstructionMark im(this);
3184   prefix(dst);
3185   emit_int8((unsigned char)0x8F);
3186   emit_operand(rax, dst);
3187 }
3188 #endif
3189 
3190 void Assembler::prefetch_prefix(Address src) {
3191   prefix(src);
3192   emit_int8(0x0F);
3193 }
3194 
3195 void Assembler::prefetchnta(Address src) {
3196   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3197   InstructionMark im(this);
3198   prefetch_prefix(src);
3199   emit_int8(0x18);
3200   emit_operand(rax, src); // 0, src
3201 }
3202 
3203 void Assembler::prefetchr(Address src) {
3204   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3205   InstructionMark im(this);
3206   prefetch_prefix(src);
3207   emit_int8(0x0D);
3208   emit_operand(rax, src); // 0, src
3209 }
3210 
3211 void Assembler::prefetcht0(Address src) {
3212   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3213   InstructionMark im(this);
3214   prefetch_prefix(src);
3215   emit_int8(0x18);
3216   emit_operand(rcx, src); // 1, src
3217 }
3218 
3219 void Assembler::prefetcht1(Address src) {
3220   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3221   InstructionMark im(this);
3222   prefetch_prefix(src);
3223   emit_int8(0x18);
3224   emit_operand(rdx, src); // 2, src
3225 }
3226 
3227 void Assembler::prefetcht2(Address src) {
3228   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3229   InstructionMark im(this);
3230   prefetch_prefix(src);
3231   emit_int8(0x18);
3232   emit_operand(rbx, src); // 3, src
3233 }
3234 
3235 void Assembler::prefetchw(Address src) {
3236   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3237   InstructionMark im(this);
3238   prefetch_prefix(src);
3239   emit_int8(0x0D);
3240   emit_operand(rcx, src); // 1, src
3241 }
3242 
3243 void Assembler::prefix(Prefix p) {
3244   emit_int8(p);
3245 }
3246 
3247 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3248   assert(VM_Version::supports_ssse3(), "");
3249   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3250                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3251   emit_int8(0x00);
3252   emit_int8((unsigned char)(0xC0 | encode));
3253 }
3254 
3255 void Assembler::pshufb(XMMRegister dst, Address src) {
3256   assert(VM_Version::supports_ssse3(), "");
3257   if (VM_Version::supports_evex()) {
3258     _tuple_type = EVEX_FVM;
3259   }
3260   InstructionMark im(this);
3261   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3262               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3263   emit_int8(0x00);
3264   emit_operand(dst, src);
3265 }
3266 
3267 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3268   _instruction_uses_vl = true;
3269   assert(isByte(mode), "invalid value");
3270   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3271   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
3272   emit_int8(mode & 0xFF);
3273 }
3274 
3275 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3276   _instruction_uses_vl = true;
3277   assert(isByte(mode), "invalid value");
3278   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3279   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3280   if (VM_Version::supports_evex()) {
3281     _tuple_type = EVEX_FV;
3282     _input_size_in_bits = EVEX_32bit;
3283   }
3284   InstructionMark im(this);
3285   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
3286   emit_int8(0x70);
3287   emit_operand(dst, src);
3288   emit_int8(mode & 0xFF);
3289 }
3290 
3291 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3292   assert(isByte(mode), "invalid value");
3293   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3294   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
3295   emit_int8(mode & 0xFF);
3296 }
3297 
3298 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3299   assert(isByte(mode), "invalid value");
3300   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3301   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3302   if (VM_Version::supports_evex()) {
3303     _tuple_type = EVEX_FVM;
3304   }
3305   InstructionMark im(this);
3306   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
3307               VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3308   emit_int8(0x70);
3309   emit_operand(dst, src);
3310   emit_int8(mode & 0xFF);
3311 }
3312 
3313 void Assembler::psrldq(XMMRegister dst, int shift) {
3314   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3315   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3316   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
3317   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3318                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3319   emit_int8(0x73);
3320   emit_int8((unsigned char)(0xC0 | encode));
3321   emit_int8(shift);
3322 }
3323 
3324 void Assembler::pslldq(XMMRegister dst, int shift) {
3325   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3326   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3327   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
3328   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3329                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3330   emit_int8(0x73);
3331   emit_int8((unsigned char)(0xC0 | encode));
3332   emit_int8(shift);
3333 }
3334 
3335 void Assembler::ptest(XMMRegister dst, Address src) {
3336   assert(VM_Version::supports_sse4_1(), "");
3337   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3338   InstructionMark im(this);
3339   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3340               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3341   emit_int8(0x17);
3342   emit_operand(dst, src);
3343 }
3344 
3345 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
3346   assert(VM_Version::supports_sse4_1(), "");
3347   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3348                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3349   emit_int8(0x17);
3350   emit_int8((unsigned char)(0xC0 | encode));
3351 }
3352 
3353 void Assembler::vptest(XMMRegister dst, Address src) {
3354   assert(VM_Version::supports_avx(), "");
3355   InstructionMark im(this);
3356   int vector_len = AVX_256bit;
3357   assert(dst != xnoreg, "sanity");
3358   int dst_enc = dst->encoding();
3359   // swap src<->dst for encoding
3360   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
3361              vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
3362   emit_int8(0x17);
3363   emit_operand(dst, src);
3364 }
3365 
3366 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
3367   assert(VM_Version::supports_avx(), "");
3368   int vector_len = AVX_256bit;
3369   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
3370   emit_int8(0x17);
3371   emit_int8((unsigned char)(0xC0 | encode));
3372 }
3373 
3374 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3375   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3376   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3377   if (VM_Version::supports_evex()) {
3378     _tuple_type = EVEX_FVM;
3379   }
3380   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3381 }
3382 
3383 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3384   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3385   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3386 }
3387 
3388 void Assembler::punpckldq(XMMRegister dst, Address src) {
3389   _instruction_uses_vl = true;
3390   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3391   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3392   if (VM_Version::supports_evex()) {
3393     _tuple_type = EVEX_FV;
3394     _input_size_in_bits = EVEX_32bit;
3395   }
3396   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3397 }
3398 
3399 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3400   _instruction_uses_vl = true;
3401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3402   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3403 }
3404 
3405 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3406   _instruction_uses_vl = true;
3407   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3408   if (VM_Version::supports_evex()) {
3409     emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
3410   } else {
3411     emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3412   }
3413 }
3414 
3415 void Assembler::push(int32_t imm32) {
3416   // in 64bits we push 64bits onto the stack but only
3417   // take a 32bit immediate
3418   emit_int8(0x68);
3419   emit_int32(imm32);
3420 }
3421 
3422 void Assembler::push(Register src) {
3423   int encode = prefix_and_encode(src->encoding());
3424 
3425   emit_int8(0x50 | encode);
3426 }
3427 
3428 void Assembler::pushf() {
3429   emit_int8((unsigned char)0x9C);
3430 }
3431 
3432 #ifndef _LP64 // no 32bit push/pop on amd64
3433 void Assembler::pushl(Address src) {
3434   // Note this will push 64bit on 64bit
3435   InstructionMark im(this);
3436   prefix(src);
3437   emit_int8((unsigned char)0xFF);
3438   emit_operand(rsi, src);
3439 }
3440 #endif
3441 
3442 void Assembler::rcll(Register dst, int imm8) {
3443   assert(isShiftCount(imm8), "illegal shift count");
3444   int encode = prefix_and_encode(dst->encoding());
3445   if (imm8 == 1) {
3446     emit_int8((unsigned char)0xD1);
3447     emit_int8((unsigned char)(0xD0 | encode));
3448   } else {
3449     emit_int8((unsigned char)0xC1);
3450     emit_int8((unsigned char)0xD0 | encode);
3451     emit_int8(imm8);
3452   }
3453 }
3454 
3455 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
3456   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3457   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3458   emit_int8(0x53);
3459   emit_int8(0xC0 | encode);
3460 }
3461 
3462 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
3463   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3464   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3465   emit_int8(0x53);
3466   emit_int8(0xC0 | encode);
3467 }
3468 
3469 void Assembler::rdtsc() {
3470   emit_int8((unsigned char)0x0F);
3471   emit_int8((unsigned char)0x31);
3472 }
3473 
3474 // copies data from [esi] to [edi] using rcx pointer sized words
3475 // generic
3476 void Assembler::rep_mov() {
3477   emit_int8((unsigned char)0xF3);
3478   // MOVSQ
3479   LP64_ONLY(prefix(REX_W));
3480   emit_int8((unsigned char)0xA5);
3481 }
3482 
3483 // sets rcx bytes with rax, value at [edi]
3484 void Assembler::rep_stosb() {
3485   emit_int8((unsigned char)0xF3); // REP
3486   LP64_ONLY(prefix(REX_W));
3487   emit_int8((unsigned char)0xAA); // STOSB
3488 }
3489 
3490 // sets rcx pointer sized words with rax, value at [edi]
3491 // generic
3492 void Assembler::rep_stos() {
3493   emit_int8((unsigned char)0xF3); // REP
3494   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
3495   emit_int8((unsigned char)0xAB);
3496 }
3497 
3498 // scans rcx pointer sized words at [edi] for occurance of rax,
3499 // generic
3500 void Assembler::repne_scan() { // repne_scan
3501   emit_int8((unsigned char)0xF2);
3502   // SCASQ
3503   LP64_ONLY(prefix(REX_W));
3504   emit_int8((unsigned char)0xAF);
3505 }
3506 
3507 #ifdef _LP64
3508 // scans rcx 4 byte words at [edi] for occurance of rax,
3509 // generic
3510 void Assembler::repne_scanl() { // repne_scan
3511   emit_int8((unsigned char)0xF2);
3512   // SCASL
3513   emit_int8((unsigned char)0xAF);
3514 }
3515 #endif
3516 
3517 void Assembler::ret(int imm16) {
3518   if (imm16 == 0) {
3519     emit_int8((unsigned char)0xC3);
3520   } else {
3521     emit_int8((unsigned char)0xC2);
3522     emit_int16(imm16);
3523   }
3524 }
3525 
3526 void Assembler::sahf() {
3527 #ifdef _LP64
3528   // Not supported in 64bit mode
3529   ShouldNotReachHere();
3530 #endif
3531   emit_int8((unsigned char)0x9E);
3532 }
3533 
3534 void Assembler::sarl(Register dst, int imm8) {
3535   int encode = prefix_and_encode(dst->encoding());
3536   assert(isShiftCount(imm8), "illegal shift count");
3537   if (imm8 == 1) {
3538     emit_int8((unsigned char)0xD1);
3539     emit_int8((unsigned char)(0xF8 | encode));
3540   } else {
3541     emit_int8((unsigned char)0xC1);
3542     emit_int8((unsigned char)(0xF8 | encode));
3543     emit_int8(imm8);
3544   }
3545 }
3546 
3547 void Assembler::sarl(Register dst) {
3548   int encode = prefix_and_encode(dst->encoding());
3549   emit_int8((unsigned char)0xD3);
3550   emit_int8((unsigned char)(0xF8 | encode));
3551 }
3552 
3553 void Assembler::sbbl(Address dst, int32_t imm32) {
3554   InstructionMark im(this);
3555   prefix(dst);
3556   emit_arith_operand(0x81, rbx, dst, imm32);
3557 }
3558 
3559 void Assembler::sbbl(Register dst, int32_t imm32) {
3560   prefix(dst);
3561   emit_arith(0x81, 0xD8, dst, imm32);
3562 }
3563 
3564 
3565 void Assembler::sbbl(Register dst, Address src) {
3566   InstructionMark im(this);
3567   prefix(src, dst);
3568   emit_int8(0x1B);
3569   emit_operand(dst, src);
3570 }
3571 
3572 void Assembler::sbbl(Register dst, Register src) {
3573   (void) prefix_and_encode(dst->encoding(), src->encoding());
3574   emit_arith(0x1B, 0xC0, dst, src);
3575 }
3576 
3577 void Assembler::setb(Condition cc, Register dst) {
3578   assert(0 <= cc && cc < 16, "illegal cc");
3579   int encode = prefix_and_encode(dst->encoding(), true);
3580   emit_int8(0x0F);
3581   emit_int8((unsigned char)0x90 | cc);
3582   emit_int8((unsigned char)(0xC0 | encode));
3583 }
3584 
3585 void Assembler::shll(Register dst, int imm8) {
3586   assert(isShiftCount(imm8), "illegal shift count");
3587   int encode = prefix_and_encode(dst->encoding());
3588   if (imm8 == 1 ) {
3589     emit_int8((unsigned char)0xD1);
3590     emit_int8((unsigned char)(0xE0 | encode));
3591   } else {
3592     emit_int8((unsigned char)0xC1);
3593     emit_int8((unsigned char)(0xE0 | encode));
3594     emit_int8(imm8);
3595   }
3596 }
3597 
3598 void Assembler::shll(Register dst) {
3599   int encode = prefix_and_encode(dst->encoding());
3600   emit_int8((unsigned char)0xD3);
3601   emit_int8((unsigned char)(0xE0 | encode));
3602 }
3603 
3604 void Assembler::shrl(Register dst, int imm8) {
3605   assert(isShiftCount(imm8), "illegal shift count");
3606   int encode = prefix_and_encode(dst->encoding());
3607   emit_int8((unsigned char)0xC1);
3608   emit_int8((unsigned char)(0xE8 | encode));
3609   emit_int8(imm8);
3610 }
3611 
3612 void Assembler::shrl(Register dst) {
3613   int encode = prefix_and_encode(dst->encoding());
3614   emit_int8((unsigned char)0xD3);
3615   emit_int8((unsigned char)(0xE8 | encode));
3616 }
3617 
3618 // copies a single word from [esi] to [edi]
3619 void Assembler::smovl() {
3620   emit_int8((unsigned char)0xA5);
3621 }
3622 
3623 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3624   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3625   if (VM_Version::supports_evex()) {
3626     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3627   } else {
3628     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3629   }
3630 }
3631 
3632 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3633   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3634   if (VM_Version::supports_evex()) {
3635     _tuple_type = EVEX_T1S;
3636     _input_size_in_bits = EVEX_64bit;
3637     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3638   } else {
3639     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3640   }
3641 }
3642 
3643 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3644   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3645   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3646 }
3647 
3648 void Assembler::std() {
3649   emit_int8((unsigned char)0xFD);
3650 }
3651 
3652 void Assembler::sqrtss(XMMRegister dst, Address src) {
3653   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3654   if (VM_Version::supports_evex()) {
3655     _tuple_type = EVEX_T1S;
3656     _input_size_in_bits = EVEX_32bit;
3657   }
3658   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3659 }
3660 
3661 void Assembler::stmxcsr( Address dst) {
3662   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3663   InstructionMark im(this);
3664   prefix(dst);
3665   emit_int8(0x0F);
3666   emit_int8((unsigned char)0xAE);
3667   emit_operand(as_Register(3), dst);
3668 }
3669 
3670 void Assembler::subl(Address dst, int32_t imm32) {
3671   InstructionMark im(this);
3672   prefix(dst);
3673   emit_arith_operand(0x81, rbp, dst, imm32);
3674 }
3675 
3676 void Assembler::subl(Address dst, Register src) {
3677   InstructionMark im(this);
3678   prefix(dst, src);
3679   emit_int8(0x29);
3680   emit_operand(src, dst);
3681 }
3682 
3683 void Assembler::subl(Register dst, int32_t imm32) {
3684   prefix(dst);
3685   emit_arith(0x81, 0xE8, dst, imm32);
3686 }
3687 
3688 // Force generation of a 4 byte immediate value even if it fits into 8bit
3689 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3690   prefix(dst);
3691   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3692 }
3693 
3694 void Assembler::subl(Register dst, Address src) {
3695   InstructionMark im(this);
3696   prefix(src, dst);
3697   emit_int8(0x2B);
3698   emit_operand(dst, src);
3699 }
3700 
3701 void Assembler::subl(Register dst, Register src) {
3702   (void) prefix_and_encode(dst->encoding(), src->encoding());
3703   emit_arith(0x2B, 0xC0, dst, src);
3704 }
3705 
3706 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3707   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3708   if (VM_Version::supports_evex()) {
3709     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3710   } else {
3711     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3712   }
3713 }
3714 
3715 void Assembler::subsd(XMMRegister dst, Address src) {
3716   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3717   if (VM_Version::supports_evex()) {
3718     _tuple_type = EVEX_T1S;
3719     _input_size_in_bits = EVEX_64bit;
3720   }
3721   if (VM_Version::supports_evex()) {
3722     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3723   } else {
3724     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3725   }
3726 }
3727 
3728 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3729   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3730   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3731 }
3732 
3733 void Assembler::subss(XMMRegister dst, Address src) {
3734   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3735   if (VM_Version::supports_evex()) {
3736     _tuple_type = EVEX_T1S;
3737     _input_size_in_bits = EVEX_32bit;
3738   }
3739   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3740 }
3741 
3742 void Assembler::testb(Register dst, int imm8) {
3743   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3744   (void) prefix_and_encode(dst->encoding(), true);
3745   emit_arith_b(0xF6, 0xC0, dst, imm8);
3746 }
3747 
3748 void Assembler::testl(Register dst, int32_t imm32) {
3749   // not using emit_arith because test
3750   // doesn't support sign-extension of
3751   // 8bit operands
3752   int encode = dst->encoding();
3753   if (encode == 0) {
3754     emit_int8((unsigned char)0xA9);
3755   } else {
3756     encode = prefix_and_encode(encode);
3757     emit_int8((unsigned char)0xF7);
3758     emit_int8((unsigned char)(0xC0 | encode));
3759   }
3760   emit_int32(imm32);
3761 }
3762 
3763 void Assembler::testl(Register dst, Register src) {
3764   (void) prefix_and_encode(dst->encoding(), src->encoding());
3765   emit_arith(0x85, 0xC0, dst, src);
3766 }
3767 
3768 void Assembler::testl(Register dst, Address  src) {
3769   InstructionMark im(this);
3770   prefix(src, dst);
3771   emit_int8((unsigned char)0x85);
3772   emit_operand(dst, src);
3773 }
3774 
3775 void Assembler::tzcntl(Register dst, Register src) {
3776   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3777   emit_int8((unsigned char)0xF3);
3778   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3779   emit_int8(0x0F);
3780   emit_int8((unsigned char)0xBC);
3781   emit_int8((unsigned char)0xC0 | encode);
3782 }
3783 
3784 void Assembler::tzcntq(Register dst, Register src) {
3785   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3786   emit_int8((unsigned char)0xF3);
3787   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3788   emit_int8(0x0F);
3789   emit_int8((unsigned char)0xBC);
3790   emit_int8((unsigned char)(0xC0 | encode));
3791 }
3792 
3793 void Assembler::ucomisd(XMMRegister dst, Address src) {
3794   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3795   if (VM_Version::supports_evex()) {
3796     _tuple_type = EVEX_T1S;
3797     _input_size_in_bits = EVEX_64bit;
3798     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3799   } else {
3800     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3801   }
3802 }
3803 
3804 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3805   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3806   if (VM_Version::supports_evex()) {
3807     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3808   } else {
3809     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3810   }
3811 }
3812 
3813 void Assembler::ucomiss(XMMRegister dst, Address src) {
3814   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3815   if (VM_Version::supports_evex()) {
3816     _tuple_type = EVEX_T1S;
3817     _input_size_in_bits = EVEX_32bit;
3818   }
3819   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3820 }
3821 
3822 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3823   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3824   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3825 }
3826 
3827 void Assembler::xabort(int8_t imm8) {
3828   emit_int8((unsigned char)0xC6);
3829   emit_int8((unsigned char)0xF8);
3830   emit_int8((unsigned char)(imm8 & 0xFF));
3831 }
3832 
3833 void Assembler::xaddl(Address dst, Register src) {
3834   InstructionMark im(this);
3835   prefix(dst, src);
3836   emit_int8(0x0F);
3837   emit_int8((unsigned char)0xC1);
3838   emit_operand(src, dst);
3839 }
3840 
3841 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3842   InstructionMark im(this);
3843   relocate(rtype);
3844   if (abort.is_bound()) {
3845     address entry = target(abort);
3846     assert(entry != NULL, "abort entry NULL");
3847     intptr_t offset = entry - pc();
3848     emit_int8((unsigned char)0xC7);
3849     emit_int8((unsigned char)0xF8);
3850     emit_int32(offset - 6); // 2 opcode + 4 address
3851   } else {
3852     abort.add_patch_at(code(), locator());
3853     emit_int8((unsigned char)0xC7);
3854     emit_int8((unsigned char)0xF8);
3855     emit_int32(0);
3856   }
3857 }
3858 
3859 void Assembler::xchgl(Register dst, Address src) { // xchg
3860   InstructionMark im(this);
3861   prefix(src, dst);
3862   emit_int8((unsigned char)0x87);
3863   emit_operand(dst, src);
3864 }
3865 
3866 void Assembler::xchgl(Register dst, Register src) {
3867   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3868   emit_int8((unsigned char)0x87);
3869   emit_int8((unsigned char)(0xC0 | encode));
3870 }
3871 
3872 void Assembler::xend() {
3873   emit_int8((unsigned char)0x0F);
3874   emit_int8((unsigned char)0x01);
3875   emit_int8((unsigned char)0xD5);
3876 }
3877 
3878 void Assembler::xgetbv() {
3879   emit_int8(0x0F);
3880   emit_int8(0x01);
3881   emit_int8((unsigned char)0xD0);
3882 }
3883 
3884 void Assembler::xorl(Register dst, int32_t imm32) {
3885   prefix(dst);
3886   emit_arith(0x81, 0xF0, dst, imm32);
3887 }
3888 
3889 void Assembler::xorl(Register dst, Address src) {
3890   InstructionMark im(this);
3891   prefix(src, dst);
3892   emit_int8(0x33);
3893   emit_operand(dst, src);
3894 }
3895 
3896 void Assembler::xorl(Register dst, Register src) {
3897   (void) prefix_and_encode(dst->encoding(), src->encoding());
3898   emit_arith(0x33, 0xC0, dst, src);
3899 }
3900 
3901 
3902 // AVX 3-operands scalar float-point arithmetic instructions
3903 
3904 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3905   assert(VM_Version::supports_avx(), "");
3906   if (VM_Version::supports_evex()) {
3907     _tuple_type = EVEX_T1S;
3908     _input_size_in_bits = EVEX_64bit;
3909     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3910   } else {
3911     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3912   }
3913 }
3914 
3915 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3916   assert(VM_Version::supports_avx(), "");
3917   if (VM_Version::supports_evex()) {
3918     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3919   } else {
3920     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3921   }
3922 }
3923 
3924 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3925   assert(VM_Version::supports_avx(), "");
3926   if (VM_Version::supports_evex()) {
3927     _tuple_type = EVEX_T1S;
3928     _input_size_in_bits = EVEX_32bit;
3929   }
3930   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3931 }
3932 
3933 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3934   assert(VM_Version::supports_avx(), "");
3935   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3936 }
3937 
3938 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3939   assert(VM_Version::supports_avx(), "");
3940   if (VM_Version::supports_evex()) {
3941     _tuple_type = EVEX_T1S;
3942     _input_size_in_bits = EVEX_64bit;
3943     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3944   } else {
3945     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3946   }
3947 }
3948 
3949 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3950   assert(VM_Version::supports_avx(), "");
3951   if (VM_Version::supports_evex()) {
3952     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3953   } else {
3954     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3955   }
3956 }
3957 
3958 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3959   assert(VM_Version::supports_avx(), "");
3960   if (VM_Version::supports_evex()) {
3961     _tuple_type = EVEX_T1S;
3962     _input_size_in_bits = EVEX_32bit;
3963   }
3964   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3965 }
3966 
3967 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3968   assert(VM_Version::supports_avx(), "");
3969   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3970 }
3971 
3972 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3973   assert(VM_Version::supports_avx(), "");
3974   if (VM_Version::supports_evex()) {
3975     _tuple_type = EVEX_T1S;
3976     _input_size_in_bits = EVEX_64bit;
3977     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3978   } else {
3979     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3980   }
3981 }
3982 
3983 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3984   assert(VM_Version::supports_avx(), "");
3985   if (VM_Version::supports_evex()) {
3986     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3987   } else {
3988     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3989   }
3990 }
3991 
3992 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3993   assert(VM_Version::supports_avx(), "");
3994   if (VM_Version::supports_evex()) {
3995     _tuple_type = EVEX_T1S;
3996     _input_size_in_bits = EVEX_32bit;
3997   }
3998   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3999 }
4000 
4001 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4002   assert(VM_Version::supports_avx(), "");
4003   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
4004 }
4005 
4006 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4007   assert(VM_Version::supports_avx(), "");
4008   if (VM_Version::supports_evex()) {
4009     _tuple_type = EVEX_T1S;
4010     _input_size_in_bits = EVEX_64bit;
4011     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
4012   } else {
4013     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
4014   }
4015 }
4016 
4017 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4018   assert(VM_Version::supports_avx(), "");
4019   if (VM_Version::supports_evex()) {
4020     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
4021   } else {
4022     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
4023   }
4024 }
4025 
4026 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4027   assert(VM_Version::supports_avx(), "");
4028   if (VM_Version::supports_evex()) {
4029     _tuple_type = EVEX_T1S;
4030     _input_size_in_bits = EVEX_32bit;
4031   }
4032   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
4033 }
4034 
4035 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4036   assert(VM_Version::supports_avx(), "");
4037   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
4038 }
4039 
4040 //====================VECTOR ARITHMETIC=====================================
4041 
4042 // Float-point vector arithmetic
4043 
4044 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4045   _instruction_uses_vl = true;
4046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4047   if (VM_Version::supports_evex()) {
4048     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
4049   } else {
4050     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
4051   }
4052 }
4053 
4054 void Assembler::addps(XMMRegister dst, XMMRegister src) {
4055   _instruction_uses_vl = true;
4056   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4057   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
4058 }
4059 
4060 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4061   _instruction_uses_vl = true;
4062   assert(VM_Version::supports_avx(), "");
4063   if (VM_Version::supports_evex()) {
4064     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4065   } else {
4066     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4067   }
4068 }
4069 
4070 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4071   _instruction_uses_vl = true;
4072   assert(VM_Version::supports_avx(), "");
4073   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4074 }
4075 
4076 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4077   _instruction_uses_vl = true;
4078   assert(VM_Version::supports_avx(), "");
4079   if (VM_Version::supports_evex()) {
4080     _tuple_type = EVEX_FV;
4081     _input_size_in_bits = EVEX_64bit;
4082     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4083   } else {
4084     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4085   }
4086 }
4087 
4088 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4089   _instruction_uses_vl = true;
4090   assert(VM_Version::supports_avx(), "");
4091   if (VM_Version::supports_evex()) {
4092     _tuple_type = EVEX_FV;
4093     _input_size_in_bits = EVEX_32bit;
4094   }
4095   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4096 }
4097 
4098 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
4099   _instruction_uses_vl = true;
4100   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4101   if (VM_Version::supports_evex()) {
4102     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
4103   } else {
4104     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
4105   }
4106 }
4107 
4108 void Assembler::subps(XMMRegister dst, XMMRegister src) {
4109   _instruction_uses_vl = true;
4110   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4111   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
4112 }
4113 
4114 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4115   _instruction_uses_vl = true;
4116   assert(VM_Version::supports_avx(), "");
4117   if (VM_Version::supports_evex()) {
4118     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4119   } else {
4120     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4121   }
4122 }
4123 
4124 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4125   _instruction_uses_vl = true;
4126   assert(VM_Version::supports_avx(), "");
4127   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4128 }
4129 
4130 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4131   _instruction_uses_vl = true;
4132   assert(VM_Version::supports_avx(), "");
4133   if (VM_Version::supports_evex()) {
4134     _tuple_type = EVEX_FV;
4135     _input_size_in_bits = EVEX_64bit;
4136     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4137   } else {
4138     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4139   }
4140 }
4141 
4142 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4143   _instruction_uses_vl = true;
4144   assert(VM_Version::supports_avx(), "");
4145   if (VM_Version::supports_evex()) {
4146     _tuple_type = EVEX_FV;
4147     _input_size_in_bits = EVEX_32bit;
4148   }
4149   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4150 }
4151 
4152 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
4153   _instruction_uses_vl = true;
4154   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4155   if (VM_Version::supports_evex()) {
4156     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4157   } else {
4158     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4159   }
4160 }
4161 
4162 void Assembler::mulpd(XMMRegister dst, Address src) {
4163   _instruction_uses_vl = true;
4164   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4165   if (VM_Version::supports_evex()) {
4166     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4167   } else {
4168     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4169   }
4170 }
4171 
4172 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
4173   _instruction_uses_vl = true;
4174   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4175   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
4176 }
4177 
4178 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4179   _instruction_uses_vl = true;
4180   assert(VM_Version::supports_avx(), "");
4181   if (VM_Version::supports_evex()) {
4182     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4183   } else {
4184     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4185   }
4186 }
4187 
4188 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4189   _instruction_uses_vl = true;
4190   assert(VM_Version::supports_avx(), "");
4191   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4192 }
4193 
4194 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4195   _instruction_uses_vl = true;
4196   assert(VM_Version::supports_avx(), "");
4197   if (VM_Version::supports_evex()) {
4198     _tuple_type = EVEX_FV;
4199     _input_size_in_bits = EVEX_64bit;
4200     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4201   } else {
4202     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4203   }
4204 }
4205 
4206 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4207   _instruction_uses_vl = true;
4208   assert(VM_Version::supports_avx(), "");
4209   if (VM_Version::supports_evex()) {
4210     _tuple_type = EVEX_FV;
4211     _input_size_in_bits = EVEX_32bit;
4212   }
4213   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4214 }
4215 
4216 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
4217   _instruction_uses_vl = true;
4218   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4219   if (VM_Version::supports_evex()) {
4220     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
4221   } else {
4222     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
4223   }
4224 }
4225 
4226 void Assembler::divps(XMMRegister dst, XMMRegister src) {
4227   _instruction_uses_vl = true;
4228   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4229   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
4230 }
4231 
4232 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4233   _instruction_uses_vl = true;
4234   assert(VM_Version::supports_avx(), "");
4235   if (VM_Version::supports_evex()) {
4236     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4237   } else {
4238     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4239   }
4240 }
4241 
4242 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4243   _instruction_uses_vl = true;
4244   assert(VM_Version::supports_avx(), "");
4245   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4246 }
4247 
4248 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4249   _instruction_uses_vl = true;
4250   assert(VM_Version::supports_avx(), "");
4251   if (VM_Version::supports_evex()) {
4252     _tuple_type = EVEX_FV;
4253     _input_size_in_bits = EVEX_64bit;
4254     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4255   } else {
4256     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4257   }
4258 }
4259 
4260 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4261   _instruction_uses_vl = true;
4262   assert(VM_Version::supports_avx(), "");
4263   if (VM_Version::supports_evex()) {
4264     _tuple_type = EVEX_FV;
4265     _input_size_in_bits = EVEX_32bit;
4266   }
4267   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4268 }
4269 
4270 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
4271   _instruction_uses_vl = true;
4272   assert(VM_Version::supports_avx(), "");
4273   if (VM_Version::supports_evex()) {
4274     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4275   } else {
4276     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4277   }
4278 }
4279 
4280 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
4281   _instruction_uses_vl = true;
4282   assert(VM_Version::supports_avx(), "");
4283   if (VM_Version::supports_evex()) {
4284     _tuple_type = EVEX_FV;
4285     _input_size_in_bits = EVEX_64bit;
4286     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4287   } else {
4288     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4289   }
4290 }
4291 
4292 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
4293   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4294   if (VM_Version::supports_avx512dq()) {
4295     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4296   } else {
4297     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4298   }
4299 }
4300 
4301 void Assembler::andps(XMMRegister dst, XMMRegister src) {
4302   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4303   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4304 }
4305 
4306 void Assembler::andps(XMMRegister dst, Address src) {
4307   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4308   if (VM_Version::supports_evex()) {
4309     _tuple_type = EVEX_FV;
4310     _input_size_in_bits = EVEX_32bit;
4311   }
4312   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4313 }
4314 
4315 void Assembler::andpd(XMMRegister dst, Address src) {
4316   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4317   if (VM_Version::supports_avx512dq()) {
4318     _tuple_type = EVEX_FV;
4319     _input_size_in_bits = EVEX_64bit;
4320     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4321   } else {
4322     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4323   }
4324 }
4325 
4326 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4327   assert(VM_Version::supports_avx(), "");
4328   if (VM_Version::supports_avx512dq()) {
4329     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4330   } else {
4331     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4332   }
4333 }
4334 
4335 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4336   assert(VM_Version::supports_avx(), "");
4337   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
4338 }
4339 
4340 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4341   assert(VM_Version::supports_avx(), "");
4342   if (VM_Version::supports_avx512dq()) {
4343     _tuple_type = EVEX_FV;
4344     _input_size_in_bits = EVEX_64bit;
4345     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4346   } else {
4347     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4348   }
4349 }
4350 
4351 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4352   assert(VM_Version::supports_avx(), "");
4353   if (VM_Version::supports_evex()) {
4354     _tuple_type = EVEX_FV;
4355     _input_size_in_bits = EVEX_32bit;
4356   }
4357   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4358 }
4359 
4360 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
4361   _instruction_uses_vl = true;
4362   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4363   if (VM_Version::supports_evex()) {
4364     emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
4365   } else {
4366     emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
4367   }
4368 }
4369 
4370 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
4371   _instruction_uses_vl = true;
4372   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4373   if (VM_Version::supports_evex()) {
4374     emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
4375   } else {
4376     emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
4377   }
4378 }
4379 
4380 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
4381   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4382   if (VM_Version::supports_avx512dq()) {
4383     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4384   } else {
4385     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4386   }
4387 }
4388 
4389 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
4390   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4391   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4392 }
4393 
4394 void Assembler::xorpd(XMMRegister dst, Address src) {
4395   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4396   if (VM_Version::supports_avx512dq()) {
4397     _tuple_type = EVEX_FV;
4398     _input_size_in_bits = EVEX_64bit;
4399     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4400   } else {
4401     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4402   }
4403 }
4404 
4405 void Assembler::xorps(XMMRegister dst, Address src) {
4406   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4407   if (VM_Version::supports_evex()) {
4408     _tuple_type = EVEX_FV;
4409     _input_size_in_bits = EVEX_32bit;
4410   }
4411   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4412 }
4413 
4414 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4415   assert(VM_Version::supports_avx(), "");
4416   if (VM_Version::supports_avx512dq()) {
4417     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4418   } else {
4419     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4420   }
4421 }
4422 
4423 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4424   assert(VM_Version::supports_avx(), "");
4425   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4426 }
4427 
4428 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4429   assert(VM_Version::supports_avx(), "");
4430   if (VM_Version::supports_avx512dq()) {
4431     _tuple_type = EVEX_FV;
4432     _input_size_in_bits = EVEX_64bit;
4433     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4434   } else {
4435     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4436   }
4437 }
4438 
4439 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4440   assert(VM_Version::supports_avx(), "");
4441   if (VM_Version::supports_evex()) {
4442     _tuple_type = EVEX_FV;
4443     _input_size_in_bits = EVEX_32bit;
4444   }
4445   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4446 }
4447 
4448 // Integer vector arithmetic
4449 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4450   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4451          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4452   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4453   emit_int8(0x01);
4454   emit_int8((unsigned char)(0xC0 | encode));
4455 }
4456 
4457 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4458   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4459          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4460   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4461   emit_int8(0x02);
4462   emit_int8((unsigned char)(0xC0 | encode));
4463 }
4464 
4465 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
4466   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4467   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4468 }
4469 
4470 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
4471   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4472   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4473 }
4474 
4475 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4476   _instruction_uses_vl = true;
4477   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4478   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4479 }
4480 
4481 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4482   _instruction_uses_vl = true;
4483   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4484   if (VM_Version::supports_evex()) {
4485     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4486   } else {
4487     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4488   }
4489 }
4490 
4491 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4492   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4493   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4494                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4495   emit_int8(0x01);
4496   emit_int8((unsigned char)(0xC0 | encode));
4497 }
4498 
4499 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4500   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4501   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4502                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4503   emit_int8(0x02);
4504   emit_int8((unsigned char)(0xC0 | encode));
4505 }
4506 
4507 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4508   assert(UseAVX > 0, "requires some form of AVX");
4509   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4510 }
4511 
4512 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4513   assert(UseAVX > 0, "requires some form of AVX");
4514   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4515 }
4516 
4517 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4518   _instruction_uses_vl = true;
4519   assert(UseAVX > 0, "requires some form of AVX");
4520   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4521 }
4522 
4523 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4524   _instruction_uses_vl = true;
4525   assert(UseAVX > 0, "requires some form of AVX");
4526   if (VM_Version::supports_evex()) {
4527     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4528   } else {
4529     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4530   }
4531 }
4532 
4533 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4534   assert(UseAVX > 0, "requires some form of AVX");
4535   if (VM_Version::supports_evex()) {
4536     _tuple_type = EVEX_FVM;
4537   }
4538   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4539 }
4540 
4541 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4542   assert(UseAVX > 0, "requires some form of AVX");
4543   if (VM_Version::supports_evex()) {
4544     _tuple_type = EVEX_FVM;
4545   }
4546   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4547 }
4548 
4549 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4550   _instruction_uses_vl = true;
4551   assert(UseAVX > 0, "requires some form of AVX");
4552   if (VM_Version::supports_evex()) {
4553     _tuple_type = EVEX_FV;
4554     _input_size_in_bits = EVEX_32bit;
4555   }
4556   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4557 }
4558 
4559 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4560   _instruction_uses_vl = true;
4561   assert(UseAVX > 0, "requires some form of AVX");
4562   if (VM_Version::supports_evex()) {
4563     _tuple_type = EVEX_FV;
4564     _input_size_in_bits = EVEX_64bit;
4565     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4566   } else {
4567     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4568   }
4569 }
4570 
4571 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4572   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4573   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4574 }
4575 
4576 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4577   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4578   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4579 }
4580 
4581 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4582   _instruction_uses_vl = true;
4583   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4584   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4585 }
4586 
4587 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4588   _instruction_uses_vl = true;
4589   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4590   if (VM_Version::supports_evex()) {
4591     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4592   } else {
4593     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4594   }
4595 }
4596 
4597 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4598   assert(UseAVX > 0, "requires some form of AVX");
4599   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4600 }
4601 
4602 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4603   assert(UseAVX > 0, "requires some form of AVX");
4604   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4605 }
4606 
4607 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4608   _instruction_uses_vl = true;
4609   assert(UseAVX > 0, "requires some form of AVX");
4610   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4611 }
4612 
4613 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4614   _instruction_uses_vl = true;
4615   assert(UseAVX > 0, "requires some form of AVX");
4616   if (VM_Version::supports_evex()) {
4617     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4618   } else {
4619     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4620   }
4621 }
4622 
4623 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4624   assert(UseAVX > 0, "requires some form of AVX");
4625   if (VM_Version::supports_evex()) {
4626     _tuple_type = EVEX_FVM;
4627   }
4628   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4629 }
4630 
4631 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4632   assert(UseAVX > 0, "requires some form of AVX");
4633   if (VM_Version::supports_evex()) {
4634     _tuple_type = EVEX_FVM;
4635   }
4636   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4637 }
4638 
4639 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4640   _instruction_uses_vl = true;
4641   assert(UseAVX > 0, "requires some form of AVX");
4642   if (VM_Version::supports_evex()) {
4643     _tuple_type = EVEX_FV;
4644     _input_size_in_bits = EVEX_32bit;
4645   }
4646   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4647 }
4648 
4649 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4650   _instruction_uses_vl = true;
4651   assert(UseAVX > 0, "requires some form of AVX");
4652   if (VM_Version::supports_evex()) {
4653     _tuple_type = EVEX_FV;
4654     _input_size_in_bits = EVEX_64bit;
4655     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4656   } else {
4657     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4658   }
4659 }
4660 
4661 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4662   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4663   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4664 }
4665 
4666 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4667   _instruction_uses_vl = true;
4668   assert(VM_Version::supports_sse4_1(), "");
4669   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
4670                                       /* no_mask_reg */ false, VEX_OPCODE_0F_38);
4671   emit_int8(0x40);
4672   emit_int8((unsigned char)(0xC0 | encode));
4673 }
4674 
4675 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4676   assert(UseAVX > 0, "requires some form of AVX");
4677   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4678 }
4679 
4680 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4681   _instruction_uses_vl = true;
4682   assert(UseAVX > 0, "requires some form of AVX");
4683   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
4684   emit_int8(0x40);
4685   emit_int8((unsigned char)(0xC0 | encode));
4686 }
4687 
4688 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4689   assert(UseAVX > 2, "requires some form of AVX");
4690   int src_enc = src->encoding();
4691   int dst_enc = dst->encoding();
4692   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4693   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
4694                                      /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
4695   emit_int8(0x40);
4696   emit_int8((unsigned char)(0xC0 | encode));
4697 }
4698 
4699 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4700   assert(UseAVX > 0, "requires some form of AVX");
4701   if (VM_Version::supports_evex()) {
4702     _tuple_type = EVEX_FVM;
4703   }
4704   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4705 }
4706 
4707 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4708   _instruction_uses_vl = true;
4709   assert(UseAVX > 0, "requires some form of AVX");
4710   if (VM_Version::supports_evex()) {
4711     _tuple_type = EVEX_FV;
4712     _input_size_in_bits = EVEX_32bit;
4713   }
4714   InstructionMark im(this);
4715   int dst_enc = dst->encoding();
4716   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4717   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4718              VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
4719   emit_int8(0x40);
4720   emit_operand(dst, src);
4721 }
4722 
4723 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4724   assert(UseAVX > 0, "requires some form of AVX");
4725   if (VM_Version::supports_evex()) {
4726     _tuple_type = EVEX_FV;
4727     _input_size_in_bits = EVEX_64bit;
4728   }
4729   InstructionMark im(this);
4730   int dst_enc = dst->encoding();
4731   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4732   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4733              VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
4734   emit_int8(0x40);
4735   emit_operand(dst, src);
4736 }
4737 
4738 // Shift packed integers left by specified number of bits.
4739 void Assembler::psllw(XMMRegister dst, int shift) {
4740   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4741   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4742   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
4743                                       /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4744   emit_int8(0x71);
4745   emit_int8((unsigned char)(0xC0 | encode));
4746   emit_int8(shift & 0xFF);
4747 }
4748 
4749 void Assembler::pslld(XMMRegister dst, int shift) {
4750   _instruction_uses_vl = true;
4751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4752   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4753   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4754   emit_int8(0x72);
4755   emit_int8((unsigned char)(0xC0 | encode));
4756   emit_int8(shift & 0xFF);
4757 }
4758 
4759 void Assembler::psllq(XMMRegister dst, int shift) {
4760   _instruction_uses_vl = true;
4761   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4762   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4763   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
4764   emit_int8(0x73);
4765   emit_int8((unsigned char)(0xC0 | encode));
4766   emit_int8(shift & 0xFF);
4767 }
4768 
4769 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4770   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4771   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4772 }
4773 
4774 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4775   _instruction_uses_vl = true;
4776   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4777   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4778 }
4779 
4780 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4781   _instruction_uses_vl = true;
4782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4783   if (VM_Version::supports_evex()) {
4784     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4785   } else {
4786     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4787   }
4788 }
4789 
4790 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4791   assert(UseAVX > 0, "requires some form of AVX");
4792   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4793   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4794   emit_int8(shift & 0xFF);
4795 }
4796 
4797 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4798   _instruction_uses_vl = true;
4799   assert(UseAVX > 0, "requires some form of AVX");
4800   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4801   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4802   emit_int8(shift & 0xFF);
4803 }
4804 
4805 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4806   _instruction_uses_vl = true;
4807   assert(UseAVX > 0, "requires some form of AVX");
4808   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4809   if (VM_Version::supports_evex()) {
4810     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4811   } else {
4812     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4813   }
4814   emit_int8(shift & 0xFF);
4815 }
4816 
4817 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4818   assert(UseAVX > 0, "requires some form of AVX");
4819   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4820 }
4821 
4822 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4823   _instruction_uses_vl = true;
4824   assert(UseAVX > 0, "requires some form of AVX");
4825   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4826 }
4827 
4828 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4829   _instruction_uses_vl = true;
4830   assert(UseAVX > 0, "requires some form of AVX");
4831   if (VM_Version::supports_evex()) {
4832     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4833   } else {
4834     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4835   }
4836 }
4837 
4838 // Shift packed integers logically right by specified number of bits.
4839 void Assembler::psrlw(XMMRegister dst, int shift) {
4840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4841   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4842   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4843                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4844   emit_int8(0x71);
4845   emit_int8((unsigned char)(0xC0 | encode));
4846   emit_int8(shift & 0xFF);
4847 }
4848 
4849 void Assembler::psrld(XMMRegister dst, int shift) {
4850   _instruction_uses_vl = true;
4851   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4852   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4853   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4854   emit_int8(0x72);
4855   emit_int8((unsigned char)(0xC0 | encode));
4856   emit_int8(shift & 0xFF);
4857 }
4858 
4859 void Assembler::psrlq(XMMRegister dst, int shift) {
4860   _instruction_uses_vl = true;
4861   // Do not confuse it with psrldq SSE2 instruction which
4862   // shifts 128 bit value in xmm register by number of bytes.
4863   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4864   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4865   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4866                                       VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
4867   emit_int8(0x73);
4868   emit_int8((unsigned char)(0xC0 | encode));
4869   emit_int8(shift & 0xFF);
4870 }
4871 
4872 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4873   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4874   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4875 }
4876 
4877 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4878   _instruction_uses_vl = true;
4879   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4880   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4881 }
4882 
4883 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4884   _instruction_uses_vl = true;
4885   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4886   if (VM_Version::supports_evex()) {
4887     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4888   } else {
4889     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4890   }
4891 }
4892 
4893 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4894   assert(UseAVX > 0, "requires some form of AVX");
4895   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4896   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4897   emit_int8(shift & 0xFF);
4898 }
4899 
4900 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4901   _instruction_uses_vl = true;
4902   assert(UseAVX > 0, "requires some form of AVX");
4903   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4904   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4905   emit_int8(shift & 0xFF);
4906 }
4907 
4908 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4909   _instruction_uses_vl = true;
4910   assert(UseAVX > 0, "requires some form of AVX");
4911   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4912   if (VM_Version::supports_evex()) {
4913     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4914   } else {
4915     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4916   }
4917   emit_int8(shift & 0xFF);
4918 }
4919 
4920 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4921   assert(UseAVX > 0, "requires some form of AVX");
4922   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4923 }
4924 
4925 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4926   _instruction_uses_vl = true;
4927   assert(UseAVX > 0, "requires some form of AVX");
4928   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4929 }
4930 
4931 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4932   _instruction_uses_vl = true;
4933   assert(UseAVX > 0, "requires some form of AVX");
4934   if (VM_Version::supports_evex()) {
4935     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4936   } else {
4937     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4938   }
4939 }
4940 
4941 // Shift packed integers arithmetically right by specified number of bits.
4942 void Assembler::psraw(XMMRegister dst, int shift) {
4943   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4944   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4945   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4946                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4947   emit_int8(0x71);
4948   emit_int8((unsigned char)(0xC0 | encode));
4949   emit_int8(shift & 0xFF);
4950 }
4951 
4952 void Assembler::psrad(XMMRegister dst, int shift) {
4953   _instruction_uses_vl = true;
4954   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4955   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4956   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4957   emit_int8(0x72);
4958   emit_int8((unsigned char)(0xC0 | encode));
4959   emit_int8(shift & 0xFF);
4960 }
4961 
4962 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4963   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4964   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4965 }
4966 
4967 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4968   _instruction_uses_vl = true;
4969   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4970   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4971 }
4972 
4973 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4974   assert(UseAVX > 0, "requires some form of AVX");
4975   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4976   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4977   emit_int8(shift & 0xFF);
4978 }
4979 
4980 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4981   _instruction_uses_vl = true;
4982   assert(UseAVX > 0, "requires some form of AVX");
4983   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4984   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4985   emit_int8(shift & 0xFF);
4986 }
4987 
4988 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4989   assert(UseAVX > 0, "requires some form of AVX");
4990   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4991 }
4992 
4993 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4994   _instruction_uses_vl = true;
4995   assert(UseAVX > 0, "requires some form of AVX");
4996   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4997 }
4998 
4999 
5000 // logical operations packed integers
5001 void Assembler::pand(XMMRegister dst, XMMRegister src) {
5002   _instruction_uses_vl = true;
5003   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5004   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
5005 }
5006 
5007 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5008   _instruction_uses_vl = true;
5009   assert(UseAVX > 0, "requires some form of AVX");
5010   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
5011 }
5012 
5013 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5014   _instruction_uses_vl = true;
5015   assert(UseAVX > 0, "requires some form of AVX");
5016   if (VM_Version::supports_evex()) {
5017     _tuple_type = EVEX_FV;
5018     _input_size_in_bits = EVEX_32bit;
5019   }
5020   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
5021 }
5022 
5023 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
5024   _instruction_uses_vl = true;
5025   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5026   if (VM_Version::supports_evex()) {
5027     emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
5028   }
5029   else {
5030     emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
5031   }
5032 }
5033 
5034 void Assembler::por(XMMRegister dst, XMMRegister src) {
5035   _instruction_uses_vl = true;
5036   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5037   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
5038 }
5039 
5040 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5041   _instruction_uses_vl = true;
5042   assert(UseAVX > 0, "requires some form of AVX");
5043   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
5044 }
5045 
5046 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5047   _instruction_uses_vl = true;
5048   assert(UseAVX > 0, "requires some form of AVX");
5049   if (VM_Version::supports_evex()) {
5050     _tuple_type = EVEX_FV;
5051     _input_size_in_bits = EVEX_32bit;
5052   }
5053   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
5054 }
5055 
5056 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
5057   _instruction_uses_vl = true;
5058   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5059   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
5060 }
5061 
5062 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5063   _instruction_uses_vl = true;
5064   assert(UseAVX > 0, "requires some form of AVX");
5065   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5066 }
5067 
5068 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5069   _instruction_uses_vl = true;
5070   assert(UseAVX > 0, "requires some form of AVX");
5071   if (VM_Version::supports_evex()) {
5072     _tuple_type = EVEX_FV;
5073     _input_size_in_bits = EVEX_32bit;
5074   }
5075   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5076 }
5077 
5078 
5079 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5080   assert(VM_Version::supports_avx(), "");
5081   int vector_len = AVX_256bit;
5082   if (VM_Version::supports_evex()) {
5083     vector_len = AVX_512bit;
5084   }
5085   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5086   emit_int8(0x18);
5087   emit_int8((unsigned char)(0xC0 | encode));
5088   // 0x00 - insert into lower 128 bits
5089   // 0x01 - insert into upper 128 bits
5090   emit_int8(0x01);
5091 }
5092 
5093 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5094   assert(VM_Version::supports_evex(), "");
5095   int vector_len = AVX_512bit;
5096   int src_enc = src->encoding();
5097   int dst_enc = dst->encoding();
5098   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5099   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5100                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5101   emit_int8(0x1A);
5102   emit_int8((unsigned char)(0xC0 | encode));
5103   // 0x00 - insert into lower 256 bits
5104   // 0x01 - insert into upper 256 bits
5105   emit_int8(0x01);
5106 }
5107 
5108 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
5109   assert(VM_Version::supports_evex(), "");
5110   _tuple_type = EVEX_T4;
5111   _input_size_in_bits = EVEX_64bit;
5112   InstructionMark im(this);
5113   int vector_len = AVX_512bit;
5114   assert(dst != xnoreg, "sanity");
5115   int dst_enc = dst->encoding();
5116   // swap src<->dst for encoding
5117   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
5118   emit_int8(0x1A);
5119   emit_operand(dst, src);
5120   // 0x01 - insert into upper 128 bits
5121   emit_int8(0x01);
5122 }
5123 
5124 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
5125   assert(VM_Version::supports_evex(), "");
5126   int vector_len = AVX_512bit;
5127   int src_enc = src->encoding();
5128   int dst_enc = dst->encoding();
5129   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5130   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5131                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5132   emit_int8(0x18);
5133   emit_int8((unsigned char)(0xC0 | encode));
5134   // 0x00 - insert into q0 128 bits (0..127)
5135   // 0x01 - insert into q1 128 bits (128..255)
5136   // 0x02 - insert into q2 128 bits (256..383)
5137   // 0x03 - insert into q3 128 bits (384..511)
5138   emit_int8(value & 0x3);
5139 }
5140 
5141 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
5142   assert(VM_Version::supports_evex(), "");
5143   _tuple_type = EVEX_T4;
5144   _input_size_in_bits = EVEX_32bit;
5145   InstructionMark im(this);
5146   int vector_len = AVX_512bit;
5147   assert(dst != xnoreg, "sanity");
5148   int dst_enc = dst->encoding();
5149   // swap src<->dst for encoding
5150   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5151   emit_int8(0x18);
5152   emit_operand(dst, src);
5153   // 0x00 - insert into q0 128 bits (0..127)
5154   // 0x01 - insert into q1 128 bits (128..255)
5155   // 0x02 - insert into q2 128 bits (256..383)
5156   // 0x03 - insert into q3 128 bits (384..511)
5157   emit_int8(value & 0x3);
5158 }
5159 
5160 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
5161   assert(VM_Version::supports_avx(), "");
5162   int vector_len = AVX_256bit;
5163   if (VM_Version::supports_evex()) {
5164     _tuple_type = EVEX_T4;
5165     _input_size_in_bits = EVEX_32bit;
5166     vector_len = AVX_512bit;
5167   }
5168   InstructionMark im(this);
5169   assert(dst != xnoreg, "sanity");
5170   int dst_enc = dst->encoding();
5171   // swap src<->dst for encoding
5172   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5173   emit_int8(0x18);
5174   emit_operand(dst, src);
5175   // 0x01 - insert into upper 128 bits
5176   emit_int8(0x01);
5177 }
5178 
5179 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
5180   assert(VM_Version::supports_avx(), "");
5181   int vector_len = AVX_256bit;
5182   if (VM_Version::supports_evex()) {
5183     vector_len = AVX_512bit;
5184   }
5185   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5186   emit_int8(0x19);
5187   emit_int8((unsigned char)(0xC0 | encode));
5188   // 0x00 - insert into lower 128 bits
5189   // 0x01 - insert into upper 128 bits
5190   emit_int8(0x01);
5191 }
5192 
5193 void Assembler::vextractf128h(Address dst, XMMRegister src) {
5194   assert(VM_Version::supports_avx(), "");
5195   int vector_len = AVX_256bit;
5196   if (VM_Version::supports_evex()) {
5197     _tuple_type = EVEX_T4;
5198     _input_size_in_bits = EVEX_32bit;
5199     vector_len = AVX_512bit;
5200   }
5201   InstructionMark im(this);
5202   assert(src != xnoreg, "sanity");
5203   int src_enc = src->encoding();
5204   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5205   emit_int8(0x19);
5206   emit_operand(src, dst);
5207   // 0x01 - extract from upper 128 bits
5208   emit_int8(0x01);
5209 }
5210 
5211 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5212   assert(VM_Version::supports_avx2(), "");
5213   int vector_len = AVX_256bit;
5214   if (VM_Version::supports_evex()) {
5215     vector_len = AVX_512bit;
5216   }
5217   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5218   emit_int8(0x38);
5219   emit_int8((unsigned char)(0xC0 | encode));
5220   // 0x00 - insert into lower 128 bits
5221   // 0x01 - insert into upper 128 bits
5222   emit_int8(0x01);
5223 }
5224 
5225 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5226   assert(VM_Version::supports_evex(), "");
5227   int vector_len = AVX_512bit;
5228   int src_enc = src->encoding();
5229   int dst_enc = dst->encoding();
5230   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5231   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5232                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
5233   emit_int8(0x38);
5234   emit_int8((unsigned char)(0xC0 | encode));
5235   // 0x00 - insert into lower 256 bits
5236   // 0x01 - insert into upper 256 bits
5237   emit_int8(0x01);
5238 }
5239 
5240 void Assembler::vinserti128h(XMMRegister dst, Address src) {
5241   assert(VM_Version::supports_avx2(), "");
5242   int vector_len = AVX_256bit;
5243   if (VM_Version::supports_evex()) {
5244     _tuple_type = EVEX_T4;
5245     _input_size_in_bits = EVEX_32bit;
5246     vector_len = AVX_512bit;
5247   }
5248   InstructionMark im(this);
5249   assert(dst != xnoreg, "sanity");
5250   int dst_enc = dst->encoding();
5251   // swap src<->dst for encoding
5252   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5253   emit_int8(0x38);
5254   emit_operand(dst, src);
5255   // 0x01 - insert into upper 128 bits
5256   emit_int8(0x01);
5257 }
5258 
5259 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
5260   assert(VM_Version::supports_avx(), "");
5261   int vector_len = AVX_256bit;
5262   if (VM_Version::supports_evex()) {
5263     vector_len = AVX_512bit;
5264   }
5265   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5266   emit_int8(0x39);
5267   emit_int8((unsigned char)(0xC0 | encode));
5268   // 0x00 - insert into lower 128 bits
5269   // 0x01 - insert into upper 128 bits
5270   emit_int8(0x01);
5271 }
5272 
5273 void Assembler::vextracti128h(Address dst, XMMRegister src) {
5274   assert(VM_Version::supports_avx2(), "");
5275   int vector_len = AVX_256bit;
5276   if (VM_Version::supports_evex()) {
5277     _tuple_type = EVEX_T4;
5278     _input_size_in_bits = EVEX_32bit;
5279     vector_len = AVX_512bit;
5280   }
5281   InstructionMark im(this);
5282   assert(src != xnoreg, "sanity");
5283   int src_enc = src->encoding();
5284   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5285   emit_int8(0x39);
5286   emit_operand(src, dst);
5287   // 0x01 - extract from upper 128 bits
5288   emit_int8(0x01);
5289 }
5290 
5291 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
5292   assert(VM_Version::supports_evex(), "");
5293   int vector_len = AVX_512bit;
5294   int src_enc = src->encoding();
5295   int dst_enc = dst->encoding();
5296   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5297                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5298   emit_int8(0x3B);
5299   emit_int8((unsigned char)(0xC0 | encode));
5300   // 0x01 - extract from upper 256 bits
5301   emit_int8(0x01);
5302 }
5303 
5304 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
5305   assert(VM_Version::supports_evex(), "");
5306   int vector_len = AVX_512bit;
5307   int src_enc = src->encoding();
5308   int dst_enc = dst->encoding();
5309   int encode;
5310   if (VM_Version::supports_avx512dq()) {
5311     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5312                                    /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5313   } else {
5314     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5315                                    /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
5316   }
5317   emit_int8(0x39);
5318   emit_int8((unsigned char)(0xC0 | encode));
5319   // 0x01 - extract from bits 255:128
5320   // 0x02 - extract from bits 383:256
5321   // 0x03 - extract from bits 511:384
5322   emit_int8(value & 0x3);
5323 }
5324 
5325 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
5326   assert(VM_Version::supports_evex(), "");
5327   int vector_len = AVX_512bit;
5328   int src_enc = src->encoding();
5329   int dst_enc = dst->encoding();
5330   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5331                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5332   emit_int8(0x1B);
5333   emit_int8((unsigned char)(0xC0 | encode));
5334   // 0x01 - extract from upper 256 bits
5335   emit_int8(0x01);
5336 }
5337 
5338 void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
5339   assert(VM_Version::supports_evex(), "");
5340   _tuple_type = EVEX_T4;
5341   _input_size_in_bits = EVEX_64bit;
5342   InstructionMark im(this);
5343   int vector_len = AVX_512bit;
5344   assert(src != xnoreg, "sanity");
5345   int src_enc = src->encoding();
5346   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5347              /* vex_w */ true, vector_len);
5348   emit_int8(0x1B);
5349   emit_operand(src, dst);
5350   // 0x01 - extract from upper 256 bits
5351   emit_int8(0x01);
5352 }
5353 
5354 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
5355   assert(VM_Version::supports_evex(), "");
5356   int vector_len = AVX_512bit;
5357   int src_enc = src->encoding();
5358   int dst_enc = dst->encoding();
5359   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5360                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5361   emit_int8(0x19);
5362   emit_int8((unsigned char)(0xC0 | encode));
5363   // 0x00 - extract from bits 127:0
5364   // 0x01 - extract from bits 255:128
5365   // 0x02 - extract from bits 383:256
5366   // 0x03 - extract from bits 511:384
5367   emit_int8(value & 0x3);
5368 }
5369 
5370 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
5371   assert(VM_Version::supports_evex(), "");
5372   _tuple_type = EVEX_T4;
5373   _input_size_in_bits = EVEX_32bit;
5374   InstructionMark im(this);
5375   int vector_len = AVX_512bit;
5376   assert(src != xnoreg, "sanity");
5377   int src_enc = src->encoding();
5378   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5379   emit_int8(0x19);
5380   emit_operand(src, dst);
5381   // 0x00 - extract from bits 127:0
5382   // 0x01 - extract from bits 255:128
5383   // 0x02 - extract from bits 383:256
5384   // 0x03 - extract from bits 511:384
5385   emit_int8(value & 0x3);
5386 }
5387 
5388 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
5389   assert(VM_Version::supports_evex(), "");
5390   int vector_len = AVX_512bit;
5391   int src_enc = src->encoding();
5392   int dst_enc = dst->encoding();
5393   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5394                                      /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5395   emit_int8(0x19);
5396   emit_int8((unsigned char)(0xC0 | encode));
5397   // 0x01 - extract from bits 255:128
5398   // 0x02 - extract from bits 383:256
5399   // 0x03 - extract from bits 511:384
5400   emit_int8(value & 0x3);
5401 }
5402 
5403 // duplicate 4-bytes integer data from src into 8 locations in dest
5404 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
5405   _instruction_uses_vl = true;
5406   assert(UseAVX > 1, "");
5407   int vector_len = AVX_256bit;
5408   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5409   emit_int8(0x58);
5410   emit_int8((unsigned char)(0xC0 | encode));
5411 }
5412 
5413 // duplicate 2-bytes integer data from src into 16 locations in dest
5414 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
5415   assert(VM_Version::supports_avx2(), "");
5416   bool vector_len = AVX_256bit;
5417   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5418                                      vector_len, VEX_OPCODE_0F_38, false);
5419   emit_int8(0x79);
5420   emit_int8((unsigned char)(0xC0 | encode));
5421 }
5422 
5423 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5424 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
5425   _instruction_uses_vl = true;
5426   assert(UseAVX > 1, "");
5427   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5428   emit_int8(0x78);
5429   emit_int8((unsigned char)(0xC0 | encode));
5430 }
5431 
5432 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
5433   _instruction_uses_vl = true;
5434   assert(UseAVX > 1, "");
5435   _tuple_type = EVEX_T1S;
5436   _input_size_in_bits = EVEX_8bit;
5437   InstructionMark im(this);
5438   assert(dst != xnoreg, "sanity");
5439   int dst_enc = dst->encoding();
5440   // swap src<->dst for encoding
5441   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5442   emit_int8(0x78);
5443   emit_operand(dst, src);
5444 }
5445 
5446 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5447 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
5448   _instruction_uses_vl = true;
5449   assert(UseAVX > 1, "");
5450   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5451   emit_int8(0x79);
5452   emit_int8((unsigned char)(0xC0 | encode));
5453 }
5454 
5455 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
5456   _instruction_uses_vl = true;
5457   assert(UseAVX > 1, "");
5458   _tuple_type = EVEX_T1S;
5459   _input_size_in_bits = EVEX_16bit;
5460   InstructionMark im(this);
5461   assert(dst != xnoreg, "sanity");
5462   int dst_enc = dst->encoding();
5463   // swap src<->dst for encoding
5464   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5465   emit_int8(0x79);
5466   emit_operand(dst, src);
5467 }
5468 
5469 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5470 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
5471   _instruction_uses_vl = true;
5472   assert(UseAVX > 1, "");
5473   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5474   emit_int8(0x58);
5475   emit_int8((unsigned char)(0xC0 | encode));
5476 }
5477 
5478 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
5479   _instruction_uses_vl = true;
5480   assert(UseAVX > 1, "");
5481   _tuple_type = EVEX_T1S;
5482   _input_size_in_bits = EVEX_32bit;
5483   InstructionMark im(this);
5484   assert(dst != xnoreg, "sanity");
5485   int dst_enc = dst->encoding();
5486   // swap src<->dst for encoding
5487   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5488   emit_int8(0x58);
5489   emit_operand(dst, src);
5490 }
5491 
5492 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5493 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
5494   _instruction_uses_vl = true;
5495   assert(UseAVX > 1, "");
5496   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5497                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5498   emit_int8(0x59);
5499   emit_int8((unsigned char)(0xC0 | encode));
5500 }
5501 
5502 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
5503   _instruction_uses_vl = true;
5504   assert(UseAVX > 1, "");
5505   _tuple_type = EVEX_T1S;
5506   _input_size_in_bits = EVEX_64bit;
5507   InstructionMark im(this);
5508   assert(dst != xnoreg, "sanity");
5509   int dst_enc = dst->encoding();
5510   // swap src<->dst for encoding
5511   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5512   emit_int8(0x59);
5513   emit_operand(dst, src);
5514 }
5515 
5516 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
5517 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
5518   _instruction_uses_vl = true;
5519   assert(UseAVX > 1, "");
5520   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5521                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5522   emit_int8(0x18);
5523   emit_int8((unsigned char)(0xC0 | encode));
5524 }
5525 
5526 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
5527   assert(UseAVX > 1, "");
5528   _tuple_type = EVEX_T1S;
5529   _input_size_in_bits = EVEX_32bit;
5530   InstructionMark im(this);
5531   assert(dst != xnoreg, "sanity");
5532   int dst_enc = dst->encoding();
5533   // swap src<->dst for encoding
5534   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5535   emit_int8(0x18);
5536   emit_operand(dst, src);
5537 }
5538 
5539 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
5540 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
5541   _instruction_uses_vl = true;
5542   assert(UseAVX > 1, "");
5543   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5544                                      /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5545   emit_int8(0x19);
5546   emit_int8((unsigned char)(0xC0 | encode));
5547 }
5548 
5549 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
5550   _instruction_uses_vl = true;
5551   assert(UseAVX > 1, "");
5552   _tuple_type = EVEX_T1S;
5553   _input_size_in_bits = EVEX_64bit;
5554   InstructionMark im(this);
5555   assert(dst != xnoreg, "sanity");
5556   int dst_enc = dst->encoding();
5557   // swap src<->dst for encoding
5558   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5559   emit_int8(0x19);
5560   emit_operand(dst, src);
5561 }
5562 
5563 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5564 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
5565   _instruction_uses_vl = true;
5566   assert(VM_Version::supports_evex(), "");
5567   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5568                                      /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5569   emit_int8(0x7A);
5570   emit_int8((unsigned char)(0xC0 | encode));
5571 }
5572 
5573 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5574 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
5575   _instruction_uses_vl = true;
5576   assert(VM_Version::supports_evex(), "");
5577   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5578                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5579   emit_int8(0x7B);
5580   emit_int8((unsigned char)(0xC0 | encode));
5581 }
5582 
5583 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5584 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
5585   _instruction_uses_vl = true;
5586   assert(VM_Version::supports_evex(), "");
5587   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5588                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5589   emit_int8(0x7C);
5590   emit_int8((unsigned char)(0xC0 | encode));
5591 }
5592 
5593 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5594 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
5595   _instruction_uses_vl = true;
5596   assert(VM_Version::supports_evex(), "");
5597   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5598                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5599   emit_int8(0x7C);
5600   emit_int8((unsigned char)(0xC0 | encode));
5601 }
5602 
5603 // Carry-Less Multiplication Quadword
5604 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
5605   assert(VM_Version::supports_clmul(), "");
5606   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
5607                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
5608   emit_int8(0x44);
5609   emit_int8((unsigned char)(0xC0 | encode));
5610   emit_int8((unsigned char)mask);
5611 }
5612 
5613 // Carry-Less Multiplication Quadword
5614 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
5615   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
5616   int vector_len = AVX_128bit;
5617   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
5618   emit_int8(0x44);
5619   emit_int8((unsigned char)(0xC0 | encode));
5620   emit_int8((unsigned char)mask);
5621 }
5622 
5623 void Assembler::vzeroupper() {
5624   assert(VM_Version::supports_avx(), "");
5625   if (UseAVX < 3)
5626   {
5627     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
5628     emit_int8(0x77);
5629   }
5630 }
5631 
5632 
5633 #ifndef _LP64
5634 // 32bit only pieces of the assembler
5635 
5636 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5637   // NO PREFIX AS NEVER 64BIT
5638   InstructionMark im(this);
5639   emit_int8((unsigned char)0x81);
5640   emit_int8((unsigned char)(0xF8 | src1->encoding()));
5641   emit_data(imm32, rspec, 0);
5642 }
5643 
5644 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5645   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
5646   InstructionMark im(this);
5647   emit_int8((unsigned char)0x81);
5648   emit_operand(rdi, src1);
5649   emit_data(imm32, rspec, 0);
5650 }
5651 
5652 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
5653 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
5654 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
5655 void Assembler::cmpxchg8(Address adr) {
5656   InstructionMark im(this);
5657   emit_int8(0x0F);
5658   emit_int8((unsigned char)0xC7);
5659   emit_operand(rcx, adr);
5660 }
5661 
5662 void Assembler::decl(Register dst) {
5663   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5664  emit_int8(0x48 | dst->encoding());
5665 }
5666 
5667 #endif // _LP64
5668 
5669 // 64bit typically doesn't use the x87 but needs to for the trig funcs
5670 
5671 void Assembler::fabs() {
5672   emit_int8((unsigned char)0xD9);
5673   emit_int8((unsigned char)0xE1);
5674 }
5675 
5676 void Assembler::fadd(int i) {
5677   emit_farith(0xD8, 0xC0, i);
5678 }
5679 
5680 void Assembler::fadd_d(Address src) {
5681   InstructionMark im(this);
5682   emit_int8((unsigned char)0xDC);
5683   emit_operand32(rax, src);
5684 }
5685 
5686 void Assembler::fadd_s(Address src) {
5687   InstructionMark im(this);
5688   emit_int8((unsigned char)0xD8);
5689   emit_operand32(rax, src);
5690 }
5691 
5692 void Assembler::fadda(int i) {
5693   emit_farith(0xDC, 0xC0, i);
5694 }
5695 
5696 void Assembler::faddp(int i) {
5697   emit_farith(0xDE, 0xC0, i);
5698 }
5699 
5700 void Assembler::fchs() {
5701   emit_int8((unsigned char)0xD9);
5702   emit_int8((unsigned char)0xE0);
5703 }
5704 
5705 void Assembler::fcom(int i) {
5706   emit_farith(0xD8, 0xD0, i);
5707 }
5708 
5709 void Assembler::fcomp(int i) {
5710   emit_farith(0xD8, 0xD8, i);
5711 }
5712 
5713 void Assembler::fcomp_d(Address src) {
5714   InstructionMark im(this);
5715   emit_int8((unsigned char)0xDC);
5716   emit_operand32(rbx, src);
5717 }
5718 
5719 void Assembler::fcomp_s(Address src) {
5720   InstructionMark im(this);
5721   emit_int8((unsigned char)0xD8);
5722   emit_operand32(rbx, src);
5723 }
5724 
5725 void Assembler::fcompp() {
5726   emit_int8((unsigned char)0xDE);
5727   emit_int8((unsigned char)0xD9);
5728 }
5729 
5730 void Assembler::fcos() {
5731   emit_int8((unsigned char)0xD9);
5732   emit_int8((unsigned char)0xFF);
5733 }
5734 
5735 void Assembler::fdecstp() {
5736   emit_int8((unsigned char)0xD9);
5737   emit_int8((unsigned char)0xF6);
5738 }
5739 
5740 void Assembler::fdiv(int i) {
5741   emit_farith(0xD8, 0xF0, i);
5742 }
5743 
5744 void Assembler::fdiv_d(Address src) {
5745   InstructionMark im(this);
5746   emit_int8((unsigned char)0xDC);
5747   emit_operand32(rsi, src);
5748 }
5749 
5750 void Assembler::fdiv_s(Address src) {
5751   InstructionMark im(this);
5752   emit_int8((unsigned char)0xD8);
5753   emit_operand32(rsi, src);
5754 }
5755 
5756 void Assembler::fdiva(int i) {
5757   emit_farith(0xDC, 0xF8, i);
5758 }
5759 
5760 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
5761 //       is erroneous for some of the floating-point instructions below.
5762 
5763 void Assembler::fdivp(int i) {
5764   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
5765 }
5766 
5767 void Assembler::fdivr(int i) {
5768   emit_farith(0xD8, 0xF8, i);
5769 }
5770 
5771 void Assembler::fdivr_d(Address src) {
5772   InstructionMark im(this);
5773   emit_int8((unsigned char)0xDC);
5774   emit_operand32(rdi, src);
5775 }
5776 
5777 void Assembler::fdivr_s(Address src) {
5778   InstructionMark im(this);
5779   emit_int8((unsigned char)0xD8);
5780   emit_operand32(rdi, src);
5781 }
5782 
5783 void Assembler::fdivra(int i) {
5784   emit_farith(0xDC, 0xF0, i);
5785 }
5786 
5787 void Assembler::fdivrp(int i) {
5788   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
5789 }
5790 
5791 void Assembler::ffree(int i) {
5792   emit_farith(0xDD, 0xC0, i);
5793 }
5794 
5795 void Assembler::fild_d(Address adr) {
5796   InstructionMark im(this);
5797   emit_int8((unsigned char)0xDF);
5798   emit_operand32(rbp, adr);
5799 }
5800 
5801 void Assembler::fild_s(Address adr) {
5802   InstructionMark im(this);
5803   emit_int8((unsigned char)0xDB);
5804   emit_operand32(rax, adr);
5805 }
5806 
5807 void Assembler::fincstp() {
5808   emit_int8((unsigned char)0xD9);
5809   emit_int8((unsigned char)0xF7);
5810 }
5811 
5812 void Assembler::finit() {
5813   emit_int8((unsigned char)0x9B);
5814   emit_int8((unsigned char)0xDB);
5815   emit_int8((unsigned char)0xE3);
5816 }
5817 
5818 void Assembler::fist_s(Address adr) {
5819   InstructionMark im(this);
5820   emit_int8((unsigned char)0xDB);
5821   emit_operand32(rdx, adr);
5822 }
5823 
5824 void Assembler::fistp_d(Address adr) {
5825   InstructionMark im(this);
5826   emit_int8((unsigned char)0xDF);
5827   emit_operand32(rdi, adr);
5828 }
5829 
5830 void Assembler::fistp_s(Address adr) {
5831   InstructionMark im(this);
5832   emit_int8((unsigned char)0xDB);
5833   emit_operand32(rbx, adr);
5834 }
5835 
5836 void Assembler::fld1() {
5837   emit_int8((unsigned char)0xD9);
5838   emit_int8((unsigned char)0xE8);
5839 }
5840 
5841 void Assembler::fld_d(Address adr) {
5842   InstructionMark im(this);
5843   emit_int8((unsigned char)0xDD);
5844   emit_operand32(rax, adr);
5845 }
5846 
5847 void Assembler::fld_s(Address adr) {
5848   InstructionMark im(this);
5849   emit_int8((unsigned char)0xD9);
5850   emit_operand32(rax, adr);
5851 }
5852 
5853 
5854 void Assembler::fld_s(int index) {
5855   emit_farith(0xD9, 0xC0, index);
5856 }
5857 
5858 void Assembler::fld_x(Address adr) {
5859   InstructionMark im(this);
5860   emit_int8((unsigned char)0xDB);
5861   emit_operand32(rbp, adr);
5862 }
5863 
5864 void Assembler::fldcw(Address src) {
5865   InstructionMark im(this);
5866   emit_int8((unsigned char)0xD9);
5867   emit_operand32(rbp, src);
5868 }
5869 
5870 void Assembler::fldenv(Address src) {
5871   InstructionMark im(this);
5872   emit_int8((unsigned char)0xD9);
5873   emit_operand32(rsp, src);
5874 }
5875 
5876 void Assembler::fldlg2() {
5877   emit_int8((unsigned char)0xD9);
5878   emit_int8((unsigned char)0xEC);
5879 }
5880 
5881 void Assembler::fldln2() {
5882   emit_int8((unsigned char)0xD9);
5883   emit_int8((unsigned char)0xED);
5884 }
5885 
5886 void Assembler::fldz() {
5887   emit_int8((unsigned char)0xD9);
5888   emit_int8((unsigned char)0xEE);
5889 }
5890 
5891 void Assembler::flog() {
5892   fldln2();
5893   fxch();
5894   fyl2x();
5895 }
5896 
5897 void Assembler::flog10() {
5898   fldlg2();
5899   fxch();
5900   fyl2x();
5901 }
5902 
5903 void Assembler::fmul(int i) {
5904   emit_farith(0xD8, 0xC8, i);
5905 }
5906 
5907 void Assembler::fmul_d(Address src) {
5908   InstructionMark im(this);
5909   emit_int8((unsigned char)0xDC);
5910   emit_operand32(rcx, src);
5911 }
5912 
5913 void Assembler::fmul_s(Address src) {
5914   InstructionMark im(this);
5915   emit_int8((unsigned char)0xD8);
5916   emit_operand32(rcx, src);
5917 }
5918 
5919 void Assembler::fmula(int i) {
5920   emit_farith(0xDC, 0xC8, i);
5921 }
5922 
5923 void Assembler::fmulp(int i) {
5924   emit_farith(0xDE, 0xC8, i);
5925 }
5926 
5927 void Assembler::fnsave(Address dst) {
5928   InstructionMark im(this);
5929   emit_int8((unsigned char)0xDD);
5930   emit_operand32(rsi, dst);
5931 }
5932 
5933 void Assembler::fnstcw(Address src) {
5934   InstructionMark im(this);
5935   emit_int8((unsigned char)0x9B);
5936   emit_int8((unsigned char)0xD9);
5937   emit_operand32(rdi, src);
5938 }
5939 
5940 void Assembler::fnstsw_ax() {
5941   emit_int8((unsigned char)0xDF);
5942   emit_int8((unsigned char)0xE0);
5943 }
5944 
5945 void Assembler::fprem() {
5946   emit_int8((unsigned char)0xD9);
5947   emit_int8((unsigned char)0xF8);
5948 }
5949 
5950 void Assembler::fprem1() {
5951   emit_int8((unsigned char)0xD9);
5952   emit_int8((unsigned char)0xF5);
5953 }
5954 
5955 void Assembler::frstor(Address src) {
5956   InstructionMark im(this);
5957   emit_int8((unsigned char)0xDD);
5958   emit_operand32(rsp, src);
5959 }
5960 
5961 void Assembler::fsin() {
5962   emit_int8((unsigned char)0xD9);
5963   emit_int8((unsigned char)0xFE);
5964 }
5965 
5966 void Assembler::fsqrt() {
5967   emit_int8((unsigned char)0xD9);
5968   emit_int8((unsigned char)0xFA);
5969 }
5970 
5971 void Assembler::fst_d(Address adr) {
5972   InstructionMark im(this);
5973   emit_int8((unsigned char)0xDD);
5974   emit_operand32(rdx, adr);
5975 }
5976 
5977 void Assembler::fst_s(Address adr) {
5978   InstructionMark im(this);
5979   emit_int8((unsigned char)0xD9);
5980   emit_operand32(rdx, adr);
5981 }
5982 
5983 void Assembler::fstp_d(Address adr) {
5984   InstructionMark im(this);
5985   emit_int8((unsigned char)0xDD);
5986   emit_operand32(rbx, adr);
5987 }
5988 
5989 void Assembler::fstp_d(int index) {
5990   emit_farith(0xDD, 0xD8, index);
5991 }
5992 
5993 void Assembler::fstp_s(Address adr) {
5994   InstructionMark im(this);
5995   emit_int8((unsigned char)0xD9);
5996   emit_operand32(rbx, adr);
5997 }
5998 
5999 void Assembler::fstp_x(Address adr) {
6000   InstructionMark im(this);
6001   emit_int8((unsigned char)0xDB);
6002   emit_operand32(rdi, adr);
6003 }
6004 
6005 void Assembler::fsub(int i) {
6006   emit_farith(0xD8, 0xE0, i);
6007 }
6008 
6009 void Assembler::fsub_d(Address src) {
6010   InstructionMark im(this);
6011   emit_int8((unsigned char)0xDC);
6012   emit_operand32(rsp, src);
6013 }
6014 
6015 void Assembler::fsub_s(Address src) {
6016   InstructionMark im(this);
6017   emit_int8((unsigned char)0xD8);
6018   emit_operand32(rsp, src);
6019 }
6020 
6021 void Assembler::fsuba(int i) {
6022   emit_farith(0xDC, 0xE8, i);
6023 }
6024 
6025 void Assembler::fsubp(int i) {
6026   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
6027 }
6028 
6029 void Assembler::fsubr(int i) {
6030   emit_farith(0xD8, 0xE8, i);
6031 }
6032 
6033 void Assembler::fsubr_d(Address src) {
6034   InstructionMark im(this);
6035   emit_int8((unsigned char)0xDC);
6036   emit_operand32(rbp, src);
6037 }
6038 
6039 void Assembler::fsubr_s(Address src) {
6040   InstructionMark im(this);
6041   emit_int8((unsigned char)0xD8);
6042   emit_operand32(rbp, src);
6043 }
6044 
6045 void Assembler::fsubra(int i) {
6046   emit_farith(0xDC, 0xE0, i);
6047 }
6048 
6049 void Assembler::fsubrp(int i) {
6050   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
6051 }
6052 
6053 void Assembler::ftan() {
6054   emit_int8((unsigned char)0xD9);
6055   emit_int8((unsigned char)0xF2);
6056   emit_int8((unsigned char)0xDD);
6057   emit_int8((unsigned char)0xD8);
6058 }
6059 
6060 void Assembler::ftst() {
6061   emit_int8((unsigned char)0xD9);
6062   emit_int8((unsigned char)0xE4);
6063 }
6064 
6065 void Assembler::fucomi(int i) {
6066   // make sure the instruction is supported (introduced for P6, together with cmov)
6067   guarantee(VM_Version::supports_cmov(), "illegal instruction");
6068   emit_farith(0xDB, 0xE8, i);
6069 }
6070 
6071 void Assembler::fucomip(int i) {
6072   // make sure the instruction is supported (introduced for P6, together with cmov)
6073   guarantee(VM_Version::supports_cmov(), "illegal instruction");
6074   emit_farith(0xDF, 0xE8, i);
6075 }
6076 
6077 void Assembler::fwait() {
6078   emit_int8((unsigned char)0x9B);
6079 }
6080 
6081 void Assembler::fxch(int i) {
6082   emit_farith(0xD9, 0xC8, i);
6083 }
6084 
6085 void Assembler::fyl2x() {
6086   emit_int8((unsigned char)0xD9);
6087   emit_int8((unsigned char)0xF1);
6088 }
6089 
6090 void Assembler::frndint() {
6091   emit_int8((unsigned char)0xD9);
6092   emit_int8((unsigned char)0xFC);
6093 }
6094 
6095 void Assembler::f2xm1() {
6096   emit_int8((unsigned char)0xD9);
6097   emit_int8((unsigned char)0xF0);
6098 }
6099 
6100 void Assembler::fldl2e() {
6101   emit_int8((unsigned char)0xD9);
6102   emit_int8((unsigned char)0xEA);
6103 }
6104 
6105 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
6106 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
6107 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
6108 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
6109 
6110 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
6111 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6112   if (pre > 0) {
6113     emit_int8(simd_pre[pre]);
6114   }
6115   if (rex_w) {
6116     prefixq(adr, xreg);
6117   } else {
6118     prefix(adr, xreg);
6119   }
6120   if (opc > 0) {
6121     emit_int8(0x0F);
6122     int opc2 = simd_opc[opc];
6123     if (opc2 > 0) {
6124       emit_int8(opc2);
6125     }
6126   }
6127 }
6128 
6129 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6130   if (pre > 0) {
6131     emit_int8(simd_pre[pre]);
6132   }
6133   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
6134                           prefix_and_encode(dst_enc, src_enc);
6135   if (opc > 0) {
6136     emit_int8(0x0F);
6137     int opc2 = simd_opc[opc];
6138     if (opc2 > 0) {
6139       emit_int8(opc2);
6140     }
6141   }
6142   return encode;
6143 }
6144 
6145 
6146 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
6147   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
6148     prefix(VEX_3bytes);
6149 
6150     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
6151     byte1 = (~byte1) & 0xE0;
6152     byte1 |= opc;
6153     emit_int8(byte1);
6154 
6155     int byte2 = ((~nds_enc) & 0xf) << 3;
6156     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
6157     emit_int8(byte2);
6158   } else {
6159     prefix(VEX_2bytes);
6160 
6161     int byte1 = vex_r ? VEX_R : 0;
6162     byte1 = (~byte1) & 0x80;
6163     byte1 |= ((~nds_enc) & 0xf) << 3;
6164     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
6165     emit_int8(byte1);
6166   }
6167 }
6168 
6169 // This is a 4 byte encoding
6170 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
6171                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
6172                             bool is_extended_context, bool is_merge_context,
6173                             int vector_len, bool no_mask_reg ){
6174   // EVEX 0x62 prefix
6175   prefix(EVEX_4bytes);
6176   _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
6177 
6178   // P0: byte 2, initialized to RXBR`00mm
6179   // instead of not'd
6180   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
6181   byte2 = (~byte2) & 0xF0;
6182   // confine opc opcode extensions in mm bits to lower two bits
6183   // of form {0F, 0F_38, 0F_3A}
6184   byte2 |= opc;
6185   emit_int8(byte2);
6186 
6187   // P1: byte 3 as Wvvvv1pp
6188   int byte3 = ((~nds_enc) & 0xf) << 3;
6189   // p[10] is always 1
6190   byte3 |= EVEX_F;
6191   byte3 |= (vex_w & 1) << 7;
6192   // confine pre opcode extensions in pp bits to lower two bits
6193   // of form {66, F3, F2}
6194   byte3 |= pre;
6195   emit_int8(byte3);
6196 
6197   // P2: byte 4 as zL'Lbv'aaa
6198   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
6199   // EVEX.v` for extending EVEX.vvvv or VIDX
6200   byte4 |= (evex_v ? 0: EVEX_V);
6201   // third EXEC.b for broadcast actions
6202   byte4 |= (is_extended_context ? EVEX_Rb : 0);
6203   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
6204   byte4 |= ((vector_len) & 0x3) << 5;
6205   // last is EVEX.z for zero/merge actions
6206   byte4 |= (is_merge_context ? EVEX_Z : 0);
6207   emit_int8(byte4);
6208 }
6209 
6210 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
6211                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
6212   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
6213   bool vex_b = adr.base_needs_rex();
6214   bool vex_x = adr.index_needs_rex();
6215   _avx_vector_len = vector_len;
6216 
6217   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6218   if (_legacy_mode_vl && _instruction_uses_vl) {
6219     switch (vector_len) {
6220     case AVX_128bit:
6221     case AVX_256bit:
6222       legacy_mode = true;
6223       break;
6224     }
6225   }
6226 
6227   if ((UseAVX > 2) && (legacy_mode == false))
6228   {
6229     bool evex_r = (xreg_enc >= 16);
6230     bool evex_v = (nds_enc >= 16);
6231     _is_evex_instruction = true;
6232     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6233   } else {
6234     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6235   }
6236   _instruction_uses_vl = false;
6237 }
6238 
6239 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
6240                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
6241   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
6242   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
6243   bool vex_x = false;
6244   _avx_vector_len = vector_len;
6245 
6246   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6247   if (_legacy_mode_vl && _instruction_uses_vl) {
6248     switch (vector_len) {
6249     case AVX_128bit:
6250     case AVX_256bit:
6251       legacy_mode = true;
6252       break;
6253     }
6254   }
6255 
6256   if ((UseAVX > 2) && (legacy_mode == false))
6257   {
6258     bool evex_r = (dst_enc >= 16);
6259     bool evex_v = (nds_enc >= 16);
6260     // can use vex_x as bank extender on rm encoding
6261     vex_x = (src_enc >= 16);
6262     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6263   } else {
6264     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6265   }
6266 
6267   _instruction_uses_vl = false;
6268 
6269   // return modrm byte components for operands
6270   return (((dst_enc & 7) << 3) | (src_enc & 7));
6271 }
6272 
6273 
6274 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
6275                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6276   if (UseAVX > 0) {
6277     int xreg_enc = xreg->encoding();
6278     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
6279     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6280   } else {
6281     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
6282     rex_prefix(adr, xreg, pre, opc, rex_w);
6283   }
6284 }
6285 
6286 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
6287                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6288   int dst_enc = dst->encoding();
6289   int src_enc = src->encoding();
6290   if (UseAVX > 0) {
6291     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6292     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6293   } else {
6294     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
6295     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
6296   }
6297 }
6298 
6299 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
6300                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6301   int dst_enc = dst->encoding();
6302   int src_enc = src->encoding();
6303   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6304   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6305 }
6306 
6307 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
6308                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6309   int dst_enc = dst->encoding();
6310   int src_enc = src->encoding();
6311   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6312   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6313 }
6314 
6315 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6316   InstructionMark im(this);
6317   simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6318   emit_int8(opcode);
6319   emit_operand(dst, src);
6320 }
6321 
6322 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
6323   InstructionMark im(this);
6324   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
6325   emit_int8(opcode);
6326   emit_operand(dst, src);
6327 }
6328 
6329 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6330   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6331   emit_int8(opcode);
6332   emit_int8((unsigned char)(0xC0 | encode));
6333 }
6334 
6335 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6336   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
6337   emit_int8(opcode);
6338   emit_int8((unsigned char)(0xC0 | encode));
6339 }
6340 
6341 // Versions with no second source register (non-destructive source).
6342 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6343   InstructionMark im(this);
6344   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
6345   emit_int8(opcode);
6346   emit_operand(dst, src);
6347 }
6348 
6349 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6350   InstructionMark im(this);
6351   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
6352   emit_int8(opcode);
6353   emit_operand(dst, src);
6354 }
6355 
6356 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6357   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6358   emit_int8(opcode);
6359   emit_int8((unsigned char)(0xC0 | encode));
6360 }
6361 
6362 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6363   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
6364   emit_int8(opcode);
6365   emit_int8((unsigned char)(0xC0 | encode));
6366 }
6367 
6368 // 3-operands AVX instructions
6369 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
6370                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6371   InstructionMark im(this);
6372   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
6373   emit_int8(opcode);
6374   emit_operand(dst, src);
6375 }
6376 
6377 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
6378                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6379   InstructionMark im(this);
6380   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
6381   emit_int8(opcode);
6382   emit_operand(dst, src);
6383 }
6384 
6385 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6386                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6387   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
6388   emit_int8(opcode);
6389   emit_int8((unsigned char)(0xC0 | encode));
6390 }
6391 
6392 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6393                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6394   int src_enc = src->encoding();
6395   int dst_enc = dst->encoding();
6396   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6397   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
6398   emit_int8(opcode);
6399   emit_int8((unsigned char)(0xC0 | encode));
6400 }
6401 
6402 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
6403   assert(VM_Version::supports_avx(), "");
6404   assert(!VM_Version::supports_evex(), "");
6405   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false);
6406   emit_int8((unsigned char)0xC2);
6407   emit_int8((unsigned char)(0xC0 | encode));
6408   emit_int8((unsigned char)(0xF & cop));
6409 }
6410 
6411 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
6412   assert(VM_Version::supports_avx(), "");
6413   assert(!VM_Version::supports_evex(), "");
6414   int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false);
6415   emit_int8((unsigned char)0x4B);
6416   emit_int8((unsigned char)(0xC0 | encode));
6417   int src2_enc = src2->encoding();
6418   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
6419 }
6420 
6421 
6422 #ifndef _LP64
6423 
6424 void Assembler::incl(Register dst) {
6425   // Don't use it directly. Use MacroAssembler::incrementl() instead.
6426   emit_int8(0x40 | dst->encoding());
6427 }
6428 
6429 void Assembler::lea(Register dst, Address src) {
6430   leal(dst, src);
6431 }
6432 
6433 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
6434   InstructionMark im(this);
6435   emit_int8((unsigned char)0xC7);
6436   emit_operand(rax, dst);
6437   emit_data((int)imm32, rspec, 0);
6438 }
6439 
6440 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
6441   InstructionMark im(this);
6442   int encode = prefix_and_encode(dst->encoding());
6443   emit_int8((unsigned char)(0xB8 | encode));
6444   emit_data((int)imm32, rspec, 0);
6445 }
6446 
6447 void Assembler::popa() { // 32bit
6448   emit_int8(0x61);
6449 }
6450 
6451 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
6452   InstructionMark im(this);
6453   emit_int8(0x68);
6454   emit_data(imm32, rspec, 0);
6455 }
6456 
6457 void Assembler::pusha() { // 32bit
6458   emit_int8(0x60);
6459 }
6460 
6461 void Assembler::set_byte_if_not_zero(Register dst) {
6462   emit_int8(0x0F);
6463   emit_int8((unsigned char)0x95);
6464   emit_int8((unsigned char)(0xE0 | dst->encoding()));
6465 }
6466 
6467 void Assembler::shldl(Register dst, Register src) {
6468   emit_int8(0x0F);
6469   emit_int8((unsigned char)0xA5);
6470   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6471 }
6472 
6473 // 0F A4 / r ib
6474 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
6475   emit_int8(0x0F);
6476   emit_int8((unsigned char)0xA4);
6477   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6478   emit_int8(imm8);
6479 }
6480 
6481 void Assembler::shrdl(Register dst, Register src) {
6482   emit_int8(0x0F);
6483   emit_int8((unsigned char)0xAD);
6484   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6485 }
6486 
6487 #else // LP64
6488 
6489 void Assembler::set_byte_if_not_zero(Register dst) {
6490   int enc = prefix_and_encode(dst->encoding(), true);
6491   emit_int8(0x0F);
6492   emit_int8((unsigned char)0x95);
6493   emit_int8((unsigned char)(0xE0 | enc));
6494 }
6495 
6496 // 64bit only pieces of the assembler
6497 // This should only be used by 64bit instructions that can use rip-relative
6498 // it cannot be used by instructions that want an immediate value.
6499 
6500 bool Assembler::reachable(AddressLiteral adr) {
6501   int64_t disp;
6502   // None will force a 64bit literal to the code stream. Likely a placeholder
6503   // for something that will be patched later and we need to certain it will
6504   // always be reachable.
6505   if (adr.reloc() == relocInfo::none) {
6506     return false;
6507   }
6508   if (adr.reloc() == relocInfo::internal_word_type) {
6509     // This should be rip relative and easily reachable.
6510     return true;
6511   }
6512   if (adr.reloc() == relocInfo::virtual_call_type ||
6513       adr.reloc() == relocInfo::opt_virtual_call_type ||
6514       adr.reloc() == relocInfo::static_call_type ||
6515       adr.reloc() == relocInfo::static_stub_type ) {
6516     // This should be rip relative within the code cache and easily
6517     // reachable until we get huge code caches. (At which point
6518     // ic code is going to have issues).
6519     return true;
6520   }
6521   if (adr.reloc() != relocInfo::external_word_type &&
6522       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
6523       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
6524       adr.reloc() != relocInfo::runtime_call_type ) {
6525     return false;
6526   }
6527 
6528   // Stress the correction code
6529   if (ForceUnreachable) {
6530     // Must be runtimecall reloc, see if it is in the codecache
6531     // Flipping stuff in the codecache to be unreachable causes issues
6532     // with things like inline caches where the additional instructions
6533     // are not handled.
6534     if (CodeCache::find_blob(adr._target) == NULL) {
6535       return false;
6536     }
6537   }
6538   // For external_word_type/runtime_call_type if it is reachable from where we
6539   // are now (possibly a temp buffer) and where we might end up
6540   // anywhere in the codeCache then we are always reachable.
6541   // This would have to change if we ever save/restore shared code
6542   // to be more pessimistic.
6543   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
6544   if (!is_simm32(disp)) return false;
6545   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
6546   if (!is_simm32(disp)) return false;
6547 
6548   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
6549 
6550   // Because rip relative is a disp + address_of_next_instruction and we
6551   // don't know the value of address_of_next_instruction we apply a fudge factor
6552   // to make sure we will be ok no matter the size of the instruction we get placed into.
6553   // We don't have to fudge the checks above here because they are already worst case.
6554 
6555   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
6556   // + 4 because better safe than sorry.
6557   const int fudge = 12 + 4;
6558   if (disp < 0) {
6559     disp -= fudge;
6560   } else {
6561     disp += fudge;
6562   }
6563   return is_simm32(disp);
6564 }
6565 
6566 // Check if the polling page is not reachable from the code cache using rip-relative
6567 // addressing.
6568 bool Assembler::is_polling_page_far() {
6569   intptr_t addr = (intptr_t)os::get_polling_page();
6570   return ForceUnreachable ||
6571          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
6572          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
6573 }
6574 
6575 void Assembler::emit_data64(jlong data,
6576                             relocInfo::relocType rtype,
6577                             int format) {
6578   if (rtype == relocInfo::none) {
6579     emit_int64(data);
6580   } else {
6581     emit_data64(data, Relocation::spec_simple(rtype), format);
6582   }
6583 }
6584 
6585 void Assembler::emit_data64(jlong data,
6586                             RelocationHolder const& rspec,
6587                             int format) {
6588   assert(imm_operand == 0, "default format must be immediate in this file");
6589   assert(imm_operand == format, "must be immediate");
6590   assert(inst_mark() != NULL, "must be inside InstructionMark");
6591   // Do not use AbstractAssembler::relocate, which is not intended for
6592   // embedded words.  Instead, relocate to the enclosing instruction.
6593   code_section()->relocate(inst_mark(), rspec, format);
6594 #ifdef ASSERT
6595   check_relocation(rspec, format);
6596 #endif
6597   emit_int64(data);
6598 }
6599 
6600 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
6601   if (reg_enc >= 8) {
6602     prefix(REX_B);
6603     reg_enc -= 8;
6604   } else if (byteinst && reg_enc >= 4) {
6605     prefix(REX);
6606   }
6607   return reg_enc;
6608 }
6609 
6610 int Assembler::prefixq_and_encode(int reg_enc) {
6611   if (reg_enc < 8) {
6612     prefix(REX_W);
6613   } else {
6614     prefix(REX_WB);
6615     reg_enc -= 8;
6616   }
6617   return reg_enc;
6618 }
6619 
6620 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
6621   if (dst_enc < 8) {
6622     if (src_enc >= 8) {
6623       prefix(REX_B);
6624       src_enc -= 8;
6625     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
6626       prefix(REX);
6627     }
6628   } else {
6629     if (src_enc < 8) {
6630       prefix(REX_R);
6631     } else {
6632       prefix(REX_RB);
6633       src_enc -= 8;
6634     }
6635     dst_enc -= 8;
6636   }
6637   return dst_enc << 3 | src_enc;
6638 }
6639 
6640 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
6641   if (dst_enc < 8) {
6642     if (src_enc < 8) {
6643       prefix(REX_W);
6644     } else {
6645       prefix(REX_WB);
6646       src_enc -= 8;
6647     }
6648   } else {
6649     if (src_enc < 8) {
6650       prefix(REX_WR);
6651     } else {
6652       prefix(REX_WRB);
6653       src_enc -= 8;
6654     }
6655     dst_enc -= 8;
6656   }
6657   return dst_enc << 3 | src_enc;
6658 }
6659 
6660 void Assembler::prefix(Register reg) {
6661   if (reg->encoding() >= 8) {
6662     prefix(REX_B);
6663   }
6664 }
6665 
6666 void Assembler::prefix(Register dst, Register src, Prefix p) {
6667   if (src->encoding() >= 8) {
6668     p = (Prefix)(p | REX_B);
6669   }
6670   if (dst->encoding() >= 8) {
6671     p = (Prefix)( p | REX_R);
6672   }
6673   if (p != Prefix_EMPTY) {
6674     // do not generate an empty prefix
6675     prefix(p);
6676   }
6677 }
6678 
6679 void Assembler::prefix(Register dst, Address adr, Prefix p) {
6680   if (adr.base_needs_rex()) {
6681     if (adr.index_needs_rex()) {
6682       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6683     } else {
6684       prefix(REX_B);
6685     }
6686   } else {
6687     if (adr.index_needs_rex()) {
6688       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6689     }
6690   }
6691   if (dst->encoding() >= 8) {
6692     p = (Prefix)(p | REX_R);
6693   }
6694   if (p != Prefix_EMPTY) {
6695     // do not generate an empty prefix
6696     prefix(p);
6697   }
6698 }
6699 
6700 void Assembler::prefix(Address adr) {
6701   if (adr.base_needs_rex()) {
6702     if (adr.index_needs_rex()) {
6703       prefix(REX_XB);
6704     } else {
6705       prefix(REX_B);
6706     }
6707   } else {
6708     if (adr.index_needs_rex()) {
6709       prefix(REX_X);
6710     }
6711   }
6712 }
6713 
6714 void Assembler::prefixq(Address adr) {
6715   if (adr.base_needs_rex()) {
6716     if (adr.index_needs_rex()) {
6717       prefix(REX_WXB);
6718     } else {
6719       prefix(REX_WB);
6720     }
6721   } else {
6722     if (adr.index_needs_rex()) {
6723       prefix(REX_WX);
6724     } else {
6725       prefix(REX_W);
6726     }
6727   }
6728 }
6729 
6730 
6731 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
6732   if (reg->encoding() < 8) {
6733     if (adr.base_needs_rex()) {
6734       if (adr.index_needs_rex()) {
6735         prefix(REX_XB);
6736       } else {
6737         prefix(REX_B);
6738       }
6739     } else {
6740       if (adr.index_needs_rex()) {
6741         prefix(REX_X);
6742       } else if (byteinst && reg->encoding() >= 4 ) {
6743         prefix(REX);
6744       }
6745     }
6746   } else {
6747     if (adr.base_needs_rex()) {
6748       if (adr.index_needs_rex()) {
6749         prefix(REX_RXB);
6750       } else {
6751         prefix(REX_RB);
6752       }
6753     } else {
6754       if (adr.index_needs_rex()) {
6755         prefix(REX_RX);
6756       } else {
6757         prefix(REX_R);
6758       }
6759     }
6760   }
6761 }
6762 
6763 void Assembler::prefixq(Address adr, Register src) {
6764   if (src->encoding() < 8) {
6765     if (adr.base_needs_rex()) {
6766       if (adr.index_needs_rex()) {
6767         prefix(REX_WXB);
6768       } else {
6769         prefix(REX_WB);
6770       }
6771     } else {
6772       if (adr.index_needs_rex()) {
6773         prefix(REX_WX);
6774       } else {
6775         prefix(REX_W);
6776       }
6777     }
6778   } else {
6779     if (adr.base_needs_rex()) {
6780       if (adr.index_needs_rex()) {
6781         prefix(REX_WRXB);
6782       } else {
6783         prefix(REX_WRB);
6784       }
6785     } else {
6786       if (adr.index_needs_rex()) {
6787         prefix(REX_WRX);
6788       } else {
6789         prefix(REX_WR);
6790       }
6791     }
6792   }
6793 }
6794 
6795 void Assembler::prefix(Address adr, XMMRegister reg) {
6796   if (reg->encoding() < 8) {
6797     if (adr.base_needs_rex()) {
6798       if (adr.index_needs_rex()) {
6799         prefix(REX_XB);
6800       } else {
6801         prefix(REX_B);
6802       }
6803     } else {
6804       if (adr.index_needs_rex()) {
6805         prefix(REX_X);
6806       }
6807     }
6808   } else {
6809     if (adr.base_needs_rex()) {
6810       if (adr.index_needs_rex()) {
6811         prefix(REX_RXB);
6812       } else {
6813         prefix(REX_RB);
6814       }
6815     } else {
6816       if (adr.index_needs_rex()) {
6817         prefix(REX_RX);
6818       } else {
6819         prefix(REX_R);
6820       }
6821     }
6822   }
6823 }
6824 
6825 void Assembler::prefixq(Address adr, XMMRegister src) {
6826   if (src->encoding() < 8) {
6827     if (adr.base_needs_rex()) {
6828       if (adr.index_needs_rex()) {
6829         prefix(REX_WXB);
6830       } else {
6831         prefix(REX_WB);
6832       }
6833     } else {
6834       if (adr.index_needs_rex()) {
6835         prefix(REX_WX);
6836       } else {
6837         prefix(REX_W);
6838       }
6839     }
6840   } else {
6841     if (adr.base_needs_rex()) {
6842       if (adr.index_needs_rex()) {
6843         prefix(REX_WRXB);
6844       } else {
6845         prefix(REX_WRB);
6846       }
6847     } else {
6848       if (adr.index_needs_rex()) {
6849         prefix(REX_WRX);
6850       } else {
6851         prefix(REX_WR);
6852       }
6853     }
6854   }
6855 }
6856 
6857 void Assembler::adcq(Register dst, int32_t imm32) {
6858   (void) prefixq_and_encode(dst->encoding());
6859   emit_arith(0x81, 0xD0, dst, imm32);
6860 }
6861 
6862 void Assembler::adcq(Register dst, Address src) {
6863   InstructionMark im(this);
6864   prefixq(src, dst);
6865   emit_int8(0x13);
6866   emit_operand(dst, src);
6867 }
6868 
6869 void Assembler::adcq(Register dst, Register src) {
6870   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6871   emit_arith(0x13, 0xC0, dst, src);
6872 }
6873 
6874 void Assembler::addq(Address dst, int32_t imm32) {
6875   InstructionMark im(this);
6876   prefixq(dst);
6877   emit_arith_operand(0x81, rax, dst,imm32);
6878 }
6879 
6880 void Assembler::addq(Address dst, Register src) {
6881   InstructionMark im(this);
6882   prefixq(dst, src);
6883   emit_int8(0x01);
6884   emit_operand(src, dst);
6885 }
6886 
6887 void Assembler::addq(Register dst, int32_t imm32) {
6888   (void) prefixq_and_encode(dst->encoding());
6889   emit_arith(0x81, 0xC0, dst, imm32);
6890 }
6891 
6892 void Assembler::addq(Register dst, Address src) {
6893   InstructionMark im(this);
6894   prefixq(src, dst);
6895   emit_int8(0x03);
6896   emit_operand(dst, src);
6897 }
6898 
6899 void Assembler::addq(Register dst, Register src) {
6900   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6901   emit_arith(0x03, 0xC0, dst, src);
6902 }
6903 
6904 void Assembler::adcxq(Register dst, Register src) {
6905   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6906   emit_int8((unsigned char)0x66);
6907   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6908   emit_int8(0x0F);
6909   emit_int8(0x38);
6910   emit_int8((unsigned char)0xF6);
6911   emit_int8((unsigned char)(0xC0 | encode));
6912 }
6913 
6914 void Assembler::adoxq(Register dst, Register src) {
6915   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6916   emit_int8((unsigned char)0xF3);
6917   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6918   emit_int8(0x0F);
6919   emit_int8(0x38);
6920   emit_int8((unsigned char)0xF6);
6921   emit_int8((unsigned char)(0xC0 | encode));
6922 }
6923 
6924 void Assembler::andq(Address dst, int32_t imm32) {
6925   InstructionMark im(this);
6926   prefixq(dst);
6927   emit_int8((unsigned char)0x81);
6928   emit_operand(rsp, dst, 4);
6929   emit_int32(imm32);
6930 }
6931 
6932 void Assembler::andq(Register dst, int32_t imm32) {
6933   (void) prefixq_and_encode(dst->encoding());
6934   emit_arith(0x81, 0xE0, dst, imm32);
6935 }
6936 
6937 void Assembler::andq(Register dst, Address src) {
6938   InstructionMark im(this);
6939   prefixq(src, dst);
6940   emit_int8(0x23);
6941   emit_operand(dst, src);
6942 }
6943 
6944 void Assembler::andq(Register dst, Register src) {
6945   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6946   emit_arith(0x23, 0xC0, dst, src);
6947 }
6948 
6949 void Assembler::andnq(Register dst, Register src1, Register src2) {
6950   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6951   int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
6952   emit_int8((unsigned char)0xF2);
6953   emit_int8((unsigned char)(0xC0 | encode));
6954 }
6955 
6956 void Assembler::andnq(Register dst, Register src1, Address src2) {
6957   InstructionMark im(this);
6958   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6959   vex_prefix_0F38_q_legacy(dst, src1, src2);
6960   emit_int8((unsigned char)0xF2);
6961   emit_operand(dst, src2);
6962 }
6963 
6964 void Assembler::bsfq(Register dst, Register src) {
6965   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6966   emit_int8(0x0F);
6967   emit_int8((unsigned char)0xBC);
6968   emit_int8((unsigned char)(0xC0 | encode));
6969 }
6970 
6971 void Assembler::bsrq(Register dst, Register src) {
6972   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6973   emit_int8(0x0F);
6974   emit_int8((unsigned char)0xBD);
6975   emit_int8((unsigned char)(0xC0 | encode));
6976 }
6977 
6978 void Assembler::bswapq(Register reg) {
6979   int encode = prefixq_and_encode(reg->encoding());
6980   emit_int8(0x0F);
6981   emit_int8((unsigned char)(0xC8 | encode));
6982 }
6983 
6984 void Assembler::blsiq(Register dst, Register src) {
6985   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6986   int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
6987   emit_int8((unsigned char)0xF3);
6988   emit_int8((unsigned char)(0xC0 | encode));
6989 }
6990 
6991 void Assembler::blsiq(Register dst, Address src) {
6992   InstructionMark im(this);
6993   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6994   vex_prefix_0F38_q_legacy(rbx, dst, src);
6995   emit_int8((unsigned char)0xF3);
6996   emit_operand(rbx, src);
6997 }
6998 
6999 void Assembler::blsmskq(Register dst, Register src) {
7000   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7001   int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
7002   emit_int8((unsigned char)0xF3);
7003   emit_int8((unsigned char)(0xC0 | encode));
7004 }
7005 
7006 void Assembler::blsmskq(Register dst, Address src) {
7007   InstructionMark im(this);
7008   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7009   vex_prefix_0F38_q_legacy(rdx, dst, src);
7010   emit_int8((unsigned char)0xF3);
7011   emit_operand(rdx, src);
7012 }
7013 
7014 void Assembler::blsrq(Register dst, Register src) {
7015   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7016   int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
7017   emit_int8((unsigned char)0xF3);
7018   emit_int8((unsigned char)(0xC0 | encode));
7019 }
7020 
7021 void Assembler::blsrq(Register dst, Address src) {
7022   InstructionMark im(this);
7023   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
7024   vex_prefix_0F38_q_legacy(rcx, dst, src);
7025   emit_int8((unsigned char)0xF3);
7026   emit_operand(rcx, src);
7027 }
7028 
7029 void Assembler::cdqq() {
7030   prefix(REX_W);
7031   emit_int8((unsigned char)0x99);
7032 }
7033 
7034 void Assembler::clflush(Address adr) {
7035   prefix(adr);
7036   emit_int8(0x0F);
7037   emit_int8((unsigned char)0xAE);
7038   emit_operand(rdi, adr);
7039 }
7040 
7041 void Assembler::cmovq(Condition cc, Register dst, Register src) {
7042   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7043   emit_int8(0x0F);
7044   emit_int8(0x40 | cc);
7045   emit_int8((unsigned char)(0xC0 | encode));
7046 }
7047 
7048 void Assembler::cmovq(Condition cc, Register dst, Address src) {
7049   InstructionMark im(this);
7050   prefixq(src, dst);
7051   emit_int8(0x0F);
7052   emit_int8(0x40 | cc);
7053   emit_operand(dst, src);
7054 }
7055 
7056 void Assembler::cmpq(Address dst, int32_t imm32) {
7057   InstructionMark im(this);
7058   prefixq(dst);
7059   emit_int8((unsigned char)0x81);
7060   emit_operand(rdi, dst, 4);
7061   emit_int32(imm32);
7062 }
7063 
7064 void Assembler::cmpq(Register dst, int32_t imm32) {
7065   (void) prefixq_and_encode(dst->encoding());
7066   emit_arith(0x81, 0xF8, dst, imm32);
7067 }
7068 
7069 void Assembler::cmpq(Address dst, Register src) {
7070   InstructionMark im(this);
7071   prefixq(dst, src);
7072   emit_int8(0x3B);
7073   emit_operand(src, dst);
7074 }
7075 
7076 void Assembler::cmpq(Register dst, Register src) {
7077   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7078   emit_arith(0x3B, 0xC0, dst, src);
7079 }
7080 
7081 void Assembler::cmpq(Register dst, Address  src) {
7082   InstructionMark im(this);
7083   prefixq(src, dst);
7084   emit_int8(0x3B);
7085   emit_operand(dst, src);
7086 }
7087 
7088 void Assembler::cmpxchgq(Register reg, Address adr) {
7089   InstructionMark im(this);
7090   prefixq(adr, reg);
7091   emit_int8(0x0F);
7092   emit_int8((unsigned char)0xB1);
7093   emit_operand(reg, adr);
7094 }
7095 
7096 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
7097   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7098   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7099   emit_int8(0x2A);
7100   emit_int8((unsigned char)(0xC0 | encode));
7101 }
7102 
7103 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
7104   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7105   if (VM_Version::supports_evex()) {
7106     _tuple_type = EVEX_T1S;
7107     _input_size_in_bits = EVEX_32bit;
7108   }
7109   InstructionMark im(this);
7110   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7111   emit_int8(0x2A);
7112   emit_operand(dst, src);
7113 }
7114 
7115 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
7116   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7117   if (VM_Version::supports_evex()) {
7118     _tuple_type = EVEX_T1S;
7119     _input_size_in_bits = EVEX_32bit;
7120   }
7121   InstructionMark im(this);
7122   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
7123   emit_int8(0x2A);
7124   emit_operand(dst, src);
7125 }
7126 
7127 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
7128   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7129   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
7130   emit_int8(0x2C);
7131   emit_int8((unsigned char)(0xC0 | encode));
7132 }
7133 
7134 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
7135   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7136   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
7137   emit_int8(0x2C);
7138   emit_int8((unsigned char)(0xC0 | encode));
7139 }
7140 
7141 void Assembler::decl(Register dst) {
7142   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7143   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
7144   int encode = prefix_and_encode(dst->encoding());
7145   emit_int8((unsigned char)0xFF);
7146   emit_int8((unsigned char)(0xC8 | encode));
7147 }
7148 
7149 void Assembler::decq(Register dst) {
7150   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7151   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7152   int encode = prefixq_and_encode(dst->encoding());
7153   emit_int8((unsigned char)0xFF);
7154   emit_int8(0xC8 | encode);
7155 }
7156 
7157 void Assembler::decq(Address dst) {
7158   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7159   InstructionMark im(this);
7160   prefixq(dst);
7161   emit_int8((unsigned char)0xFF);
7162   emit_operand(rcx, dst);
7163 }
7164 
7165 void Assembler::fxrstor(Address src) {
7166   prefixq(src);
7167   emit_int8(0x0F);
7168   emit_int8((unsigned char)0xAE);
7169   emit_operand(as_Register(1), src);
7170 }
7171 
7172 void Assembler::xrstor(Address src) {
7173   prefixq(src);
7174   emit_int8(0x0F);
7175   emit_int8((unsigned char)0xAE);
7176   emit_operand(as_Register(5), src);
7177 }
7178 
7179 void Assembler::fxsave(Address dst) {
7180   prefixq(dst);
7181   emit_int8(0x0F);
7182   emit_int8((unsigned char)0xAE);
7183   emit_operand(as_Register(0), dst);
7184 }
7185 
7186 void Assembler::xsave(Address dst) {
7187   prefixq(dst);
7188   emit_int8(0x0F);
7189   emit_int8((unsigned char)0xAE);
7190   emit_operand(as_Register(4), dst);
7191 }
7192 
7193 void Assembler::idivq(Register src) {
7194   int encode = prefixq_and_encode(src->encoding());
7195   emit_int8((unsigned char)0xF7);
7196   emit_int8((unsigned char)(0xF8 | encode));
7197 }
7198 
7199 void Assembler::imulq(Register dst, Register src) {
7200   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7201   emit_int8(0x0F);
7202   emit_int8((unsigned char)0xAF);
7203   emit_int8((unsigned char)(0xC0 | encode));
7204 }
7205 
7206 void Assembler::imulq(Register dst, Register src, int value) {
7207   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7208   if (is8bit(value)) {
7209     emit_int8(0x6B);
7210     emit_int8((unsigned char)(0xC0 | encode));
7211     emit_int8(value & 0xFF);
7212   } else {
7213     emit_int8(0x69);
7214     emit_int8((unsigned char)(0xC0 | encode));
7215     emit_int32(value);
7216   }
7217 }
7218 
7219 void Assembler::imulq(Register dst, Address src) {
7220   InstructionMark im(this);
7221   prefixq(src, dst);
7222   emit_int8(0x0F);
7223   emit_int8((unsigned char) 0xAF);
7224   emit_operand(dst, src);
7225 }
7226 
7227 void Assembler::incl(Register dst) {
7228   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7229   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7230   int encode = prefix_and_encode(dst->encoding());
7231   emit_int8((unsigned char)0xFF);
7232   emit_int8((unsigned char)(0xC0 | encode));
7233 }
7234 
7235 void Assembler::incq(Register dst) {
7236   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7237   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7238   int encode = prefixq_and_encode(dst->encoding());
7239   emit_int8((unsigned char)0xFF);
7240   emit_int8((unsigned char)(0xC0 | encode));
7241 }
7242 
7243 void Assembler::incq(Address dst) {
7244   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7245   InstructionMark im(this);
7246   prefixq(dst);
7247   emit_int8((unsigned char)0xFF);
7248   emit_operand(rax, dst);
7249 }
7250 
7251 void Assembler::lea(Register dst, Address src) {
7252   leaq(dst, src);
7253 }
7254 
7255 void Assembler::leaq(Register dst, Address src) {
7256   InstructionMark im(this);
7257   prefixq(src, dst);
7258   emit_int8((unsigned char)0x8D);
7259   emit_operand(dst, src);
7260 }
7261 
7262 void Assembler::mov64(Register dst, int64_t imm64) {
7263   InstructionMark im(this);
7264   int encode = prefixq_and_encode(dst->encoding());
7265   emit_int8((unsigned char)(0xB8 | encode));
7266   emit_int64(imm64);
7267 }
7268 
7269 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
7270   InstructionMark im(this);
7271   int encode = prefixq_and_encode(dst->encoding());
7272   emit_int8(0xB8 | encode);
7273   emit_data64(imm64, rspec);
7274 }
7275 
7276 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7277   InstructionMark im(this);
7278   int encode = prefix_and_encode(dst->encoding());
7279   emit_int8((unsigned char)(0xB8 | encode));
7280   emit_data((int)imm32, rspec, narrow_oop_operand);
7281 }
7282 
7283 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
7284   InstructionMark im(this);
7285   prefix(dst);
7286   emit_int8((unsigned char)0xC7);
7287   emit_operand(rax, dst, 4);
7288   emit_data((int)imm32, rspec, narrow_oop_operand);
7289 }
7290 
7291 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7292   InstructionMark im(this);
7293   int encode = prefix_and_encode(src1->encoding());
7294   emit_int8((unsigned char)0x81);
7295   emit_int8((unsigned char)(0xF8 | encode));
7296   emit_data((int)imm32, rspec, narrow_oop_operand);
7297 }
7298 
7299 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7300   InstructionMark im(this);
7301   prefix(src1);
7302   emit_int8((unsigned char)0x81);
7303   emit_operand(rax, src1, 4);
7304   emit_data((int)imm32, rspec, narrow_oop_operand);
7305 }
7306 
7307 void Assembler::lzcntq(Register dst, Register src) {
7308   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
7309   emit_int8((unsigned char)0xF3);
7310   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7311   emit_int8(0x0F);
7312   emit_int8((unsigned char)0xBD);
7313   emit_int8((unsigned char)(0xC0 | encode));
7314 }
7315 
7316 void Assembler::movdq(XMMRegister dst, Register src) {
7317   // table D-1 says MMX/SSE2
7318   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7319   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
7320   emit_int8(0x6E);
7321   emit_int8((unsigned char)(0xC0 | encode));
7322 }
7323 
7324 void Assembler::movdq(Register dst, XMMRegister src) {
7325   // table D-1 says MMX/SSE2
7326   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7327   // swap src/dst to get correct prefix
7328   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
7329   emit_int8(0x7E);
7330   emit_int8((unsigned char)(0xC0 | encode));
7331 }
7332 
7333 void Assembler::movq(Register dst, Register src) {
7334   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7335   emit_int8((unsigned char)0x8B);
7336   emit_int8((unsigned char)(0xC0 | encode));
7337 }
7338 
7339 void Assembler::movq(Register dst, Address src) {
7340   InstructionMark im(this);
7341   prefixq(src, dst);
7342   emit_int8((unsigned char)0x8B);
7343   emit_operand(dst, src);
7344 }
7345 
7346 void Assembler::movq(Address dst, Register src) {
7347   InstructionMark im(this);
7348   prefixq(dst, src);
7349   emit_int8((unsigned char)0x89);
7350   emit_operand(src, dst);
7351 }
7352 
7353 void Assembler::movsbq(Register dst, Address src) {
7354   InstructionMark im(this);
7355   prefixq(src, dst);
7356   emit_int8(0x0F);
7357   emit_int8((unsigned char)0xBE);
7358   emit_operand(dst, src);
7359 }
7360 
7361 void Assembler::movsbq(Register dst, Register src) {
7362   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7363   emit_int8(0x0F);
7364   emit_int8((unsigned char)0xBE);
7365   emit_int8((unsigned char)(0xC0 | encode));
7366 }
7367 
7368 void Assembler::movslq(Register dst, int32_t imm32) {
7369   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
7370   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
7371   // as a result we shouldn't use until tested at runtime...
7372   ShouldNotReachHere();
7373   InstructionMark im(this);
7374   int encode = prefixq_and_encode(dst->encoding());
7375   emit_int8((unsigned char)(0xC7 | encode));
7376   emit_int32(imm32);
7377 }
7378 
7379 void Assembler::movslq(Address dst, int32_t imm32) {
7380   assert(is_simm32(imm32), "lost bits");
7381   InstructionMark im(this);
7382   prefixq(dst);
7383   emit_int8((unsigned char)0xC7);
7384   emit_operand(rax, dst, 4);
7385   emit_int32(imm32);
7386 }
7387 
7388 void Assembler::movslq(Register dst, Address src) {
7389   InstructionMark im(this);
7390   prefixq(src, dst);
7391   emit_int8(0x63);
7392   emit_operand(dst, src);
7393 }
7394 
7395 void Assembler::movslq(Register dst, Register src) {
7396   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7397   emit_int8(0x63);
7398   emit_int8((unsigned char)(0xC0 | encode));
7399 }
7400 
7401 void Assembler::movswq(Register dst, Address src) {
7402   InstructionMark im(this);
7403   prefixq(src, dst);
7404   emit_int8(0x0F);
7405   emit_int8((unsigned char)0xBF);
7406   emit_operand(dst, src);
7407 }
7408 
7409 void Assembler::movswq(Register dst, Register src) {
7410   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7411   emit_int8((unsigned char)0x0F);
7412   emit_int8((unsigned char)0xBF);
7413   emit_int8((unsigned char)(0xC0 | encode));
7414 }
7415 
7416 void Assembler::movzbq(Register dst, Address src) {
7417   InstructionMark im(this);
7418   prefixq(src, dst);
7419   emit_int8((unsigned char)0x0F);
7420   emit_int8((unsigned char)0xB6);
7421   emit_operand(dst, src);
7422 }
7423 
7424 void Assembler::movzbq(Register dst, Register src) {
7425   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7426   emit_int8(0x0F);
7427   emit_int8((unsigned char)0xB6);
7428   emit_int8(0xC0 | encode);
7429 }
7430 
7431 void Assembler::movzwq(Register dst, Address src) {
7432   InstructionMark im(this);
7433   prefixq(src, dst);
7434   emit_int8((unsigned char)0x0F);
7435   emit_int8((unsigned char)0xB7);
7436   emit_operand(dst, src);
7437 }
7438 
7439 void Assembler::movzwq(Register dst, Register src) {
7440   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7441   emit_int8((unsigned char)0x0F);
7442   emit_int8((unsigned char)0xB7);
7443   emit_int8((unsigned char)(0xC0 | encode));
7444 }
7445 
7446 void Assembler::mulq(Address src) {
7447   InstructionMark im(this);
7448   prefixq(src);
7449   emit_int8((unsigned char)0xF7);
7450   emit_operand(rsp, src);
7451 }
7452 
7453 void Assembler::mulq(Register src) {
7454   int encode = prefixq_and_encode(src->encoding());
7455   emit_int8((unsigned char)0xF7);
7456   emit_int8((unsigned char)(0xE0 | encode));
7457 }
7458 
7459 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
7460   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7461   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
7462                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7463   emit_int8((unsigned char)0xF6);
7464   emit_int8((unsigned char)(0xC0 | encode));
7465 }
7466 
7467 void Assembler::negq(Register dst) {
7468   int encode = prefixq_and_encode(dst->encoding());
7469   emit_int8((unsigned char)0xF7);
7470   emit_int8((unsigned char)(0xD8 | encode));
7471 }
7472 
7473 void Assembler::notq(Register dst) {
7474   int encode = prefixq_and_encode(dst->encoding());
7475   emit_int8((unsigned char)0xF7);
7476   emit_int8((unsigned char)(0xD0 | encode));
7477 }
7478 
7479 void Assembler::orq(Address dst, int32_t imm32) {
7480   InstructionMark im(this);
7481   prefixq(dst);
7482   emit_int8((unsigned char)0x81);
7483   emit_operand(rcx, dst, 4);
7484   emit_int32(imm32);
7485 }
7486 
7487 void Assembler::orq(Register dst, int32_t imm32) {
7488   (void) prefixq_and_encode(dst->encoding());
7489   emit_arith(0x81, 0xC8, dst, imm32);
7490 }
7491 
7492 void Assembler::orq(Register dst, Address src) {
7493   InstructionMark im(this);
7494   prefixq(src, dst);
7495   emit_int8(0x0B);
7496   emit_operand(dst, src);
7497 }
7498 
7499 void Assembler::orq(Register dst, Register src) {
7500   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7501   emit_arith(0x0B, 0xC0, dst, src);
7502 }
7503 
7504 void Assembler::popa() { // 64bit
7505   movq(r15, Address(rsp, 0));
7506   movq(r14, Address(rsp, wordSize));
7507   movq(r13, Address(rsp, 2 * wordSize));
7508   movq(r12, Address(rsp, 3 * wordSize));
7509   movq(r11, Address(rsp, 4 * wordSize));
7510   movq(r10, Address(rsp, 5 * wordSize));
7511   movq(r9,  Address(rsp, 6 * wordSize));
7512   movq(r8,  Address(rsp, 7 * wordSize));
7513   movq(rdi, Address(rsp, 8 * wordSize));
7514   movq(rsi, Address(rsp, 9 * wordSize));
7515   movq(rbp, Address(rsp, 10 * wordSize));
7516   // skip rsp
7517   movq(rbx, Address(rsp, 12 * wordSize));
7518   movq(rdx, Address(rsp, 13 * wordSize));
7519   movq(rcx, Address(rsp, 14 * wordSize));
7520   movq(rax, Address(rsp, 15 * wordSize));
7521 
7522   addq(rsp, 16 * wordSize);
7523 }
7524 
7525 void Assembler::popcntq(Register dst, Address src) {
7526   assert(VM_Version::supports_popcnt(), "must support");
7527   InstructionMark im(this);
7528   emit_int8((unsigned char)0xF3);
7529   prefixq(src, dst);
7530   emit_int8((unsigned char)0x0F);
7531   emit_int8((unsigned char)0xB8);
7532   emit_operand(dst, src);
7533 }
7534 
7535 void Assembler::popcntq(Register dst, Register src) {
7536   assert(VM_Version::supports_popcnt(), "must support");
7537   emit_int8((unsigned char)0xF3);
7538   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7539   emit_int8((unsigned char)0x0F);
7540   emit_int8((unsigned char)0xB8);
7541   emit_int8((unsigned char)(0xC0 | encode));
7542 }
7543 
7544 void Assembler::popq(Address dst) {
7545   InstructionMark im(this);
7546   prefixq(dst);
7547   emit_int8((unsigned char)0x8F);
7548   emit_operand(rax, dst);
7549 }
7550 
7551 void Assembler::pusha() { // 64bit
7552   // we have to store original rsp.  ABI says that 128 bytes
7553   // below rsp are local scratch.
7554   movq(Address(rsp, -5 * wordSize), rsp);
7555 
7556   subq(rsp, 16 * wordSize);
7557 
7558   movq(Address(rsp, 15 * wordSize), rax);
7559   movq(Address(rsp, 14 * wordSize), rcx);
7560   movq(Address(rsp, 13 * wordSize), rdx);
7561   movq(Address(rsp, 12 * wordSize), rbx);
7562   // skip rsp
7563   movq(Address(rsp, 10 * wordSize), rbp);
7564   movq(Address(rsp, 9 * wordSize), rsi);
7565   movq(Address(rsp, 8 * wordSize), rdi);
7566   movq(Address(rsp, 7 * wordSize), r8);
7567   movq(Address(rsp, 6 * wordSize), r9);
7568   movq(Address(rsp, 5 * wordSize), r10);
7569   movq(Address(rsp, 4 * wordSize), r11);
7570   movq(Address(rsp, 3 * wordSize), r12);
7571   movq(Address(rsp, 2 * wordSize), r13);
7572   movq(Address(rsp, wordSize), r14);
7573   movq(Address(rsp, 0), r15);
7574 }
7575 
7576 void Assembler::pushq(Address src) {
7577   InstructionMark im(this);
7578   prefixq(src);
7579   emit_int8((unsigned char)0xFF);
7580   emit_operand(rsi, src);
7581 }
7582 
7583 void Assembler::rclq(Register dst, int imm8) {
7584   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7585   int encode = prefixq_and_encode(dst->encoding());
7586   if (imm8 == 1) {
7587     emit_int8((unsigned char)0xD1);
7588     emit_int8((unsigned char)(0xD0 | encode));
7589   } else {
7590     emit_int8((unsigned char)0xC1);
7591     emit_int8((unsigned char)(0xD0 | encode));
7592     emit_int8(imm8);
7593   }
7594 }
7595 
7596 void Assembler::rcrq(Register dst, int imm8) {
7597   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7598   int encode = prefixq_and_encode(dst->encoding());
7599   if (imm8 == 1) {
7600     emit_int8((unsigned char)0xD1);
7601     emit_int8((unsigned char)(0xD8 | encode));
7602   } else {
7603     emit_int8((unsigned char)0xC1);
7604     emit_int8((unsigned char)(0xD8 | encode));
7605     emit_int8(imm8);
7606   }
7607 }
7608 
7609 void Assembler::rorq(Register dst, int imm8) {
7610   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7611   int encode = prefixq_and_encode(dst->encoding());
7612   if (imm8 == 1) {
7613     emit_int8((unsigned char)0xD1);
7614     emit_int8((unsigned char)(0xC8 | encode));
7615   } else {
7616     emit_int8((unsigned char)0xC1);
7617     emit_int8((unsigned char)(0xc8 | encode));
7618     emit_int8(imm8);
7619   }
7620 }
7621 
7622 void Assembler::rorxq(Register dst, Register src, int imm8) {
7623   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7624   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
7625                                      /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7626   emit_int8((unsigned char)0xF0);
7627   emit_int8((unsigned char)(0xC0 | encode));
7628   emit_int8(imm8);
7629 }
7630 
7631 void Assembler::sarq(Register dst, int imm8) {
7632   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7633   int encode = prefixq_and_encode(dst->encoding());
7634   if (imm8 == 1) {
7635     emit_int8((unsigned char)0xD1);
7636     emit_int8((unsigned char)(0xF8 | encode));
7637   } else {
7638     emit_int8((unsigned char)0xC1);
7639     emit_int8((unsigned char)(0xF8 | encode));
7640     emit_int8(imm8);
7641   }
7642 }
7643 
7644 void Assembler::sarq(Register dst) {
7645   int encode = prefixq_and_encode(dst->encoding());
7646   emit_int8((unsigned char)0xD3);
7647   emit_int8((unsigned char)(0xF8 | encode));
7648 }
7649 
7650 void Assembler::sbbq(Address dst, int32_t imm32) {
7651   InstructionMark im(this);
7652   prefixq(dst);
7653   emit_arith_operand(0x81, rbx, dst, imm32);
7654 }
7655 
7656 void Assembler::sbbq(Register dst, int32_t imm32) {
7657   (void) prefixq_and_encode(dst->encoding());
7658   emit_arith(0x81, 0xD8, dst, imm32);
7659 }
7660 
7661 void Assembler::sbbq(Register dst, Address src) {
7662   InstructionMark im(this);
7663   prefixq(src, dst);
7664   emit_int8(0x1B);
7665   emit_operand(dst, src);
7666 }
7667 
7668 void Assembler::sbbq(Register dst, Register src) {
7669   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7670   emit_arith(0x1B, 0xC0, dst, src);
7671 }
7672 
7673 void Assembler::shlq(Register dst, int imm8) {
7674   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7675   int encode = prefixq_and_encode(dst->encoding());
7676   if (imm8 == 1) {
7677     emit_int8((unsigned char)0xD1);
7678     emit_int8((unsigned char)(0xE0 | encode));
7679   } else {
7680     emit_int8((unsigned char)0xC1);
7681     emit_int8((unsigned char)(0xE0 | encode));
7682     emit_int8(imm8);
7683   }
7684 }
7685 
7686 void Assembler::shlq(Register dst) {
7687   int encode = prefixq_and_encode(dst->encoding());
7688   emit_int8((unsigned char)0xD3);
7689   emit_int8((unsigned char)(0xE0 | encode));
7690 }
7691 
7692 void Assembler::shrq(Register dst, int imm8) {
7693   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7694   int encode = prefixq_and_encode(dst->encoding());
7695   emit_int8((unsigned char)0xC1);
7696   emit_int8((unsigned char)(0xE8 | encode));
7697   emit_int8(imm8);
7698 }
7699 
7700 void Assembler::shrq(Register dst) {
7701   int encode = prefixq_and_encode(dst->encoding());
7702   emit_int8((unsigned char)0xD3);
7703   emit_int8(0xE8 | encode);
7704 }
7705 
7706 void Assembler::subq(Address dst, int32_t imm32) {
7707   InstructionMark im(this);
7708   prefixq(dst);
7709   emit_arith_operand(0x81, rbp, dst, imm32);
7710 }
7711 
7712 void Assembler::subq(Address dst, Register src) {
7713   InstructionMark im(this);
7714   prefixq(dst, src);
7715   emit_int8(0x29);
7716   emit_operand(src, dst);
7717 }
7718 
7719 void Assembler::subq(Register dst, int32_t imm32) {
7720   (void) prefixq_and_encode(dst->encoding());
7721   emit_arith(0x81, 0xE8, dst, imm32);
7722 }
7723 
7724 // Force generation of a 4 byte immediate value even if it fits into 8bit
7725 void Assembler::subq_imm32(Register dst, int32_t imm32) {
7726   (void) prefixq_and_encode(dst->encoding());
7727   emit_arith_imm32(0x81, 0xE8, dst, imm32);
7728 }
7729 
7730 void Assembler::subq(Register dst, Address src) {
7731   InstructionMark im(this);
7732   prefixq(src, dst);
7733   emit_int8(0x2B);
7734   emit_operand(dst, src);
7735 }
7736 
7737 void Assembler::subq(Register dst, Register src) {
7738   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7739   emit_arith(0x2B, 0xC0, dst, src);
7740 }
7741 
7742 void Assembler::testq(Register dst, int32_t imm32) {
7743   // not using emit_arith because test
7744   // doesn't support sign-extension of
7745   // 8bit operands
7746   int encode = dst->encoding();
7747   if (encode == 0) {
7748     prefix(REX_W);
7749     emit_int8((unsigned char)0xA9);
7750   } else {
7751     encode = prefixq_and_encode(encode);
7752     emit_int8((unsigned char)0xF7);
7753     emit_int8((unsigned char)(0xC0 | encode));
7754   }
7755   emit_int32(imm32);
7756 }
7757 
7758 void Assembler::testq(Register dst, Register src) {
7759   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7760   emit_arith(0x85, 0xC0, dst, src);
7761 }
7762 
7763 void Assembler::xaddq(Address dst, Register src) {
7764   InstructionMark im(this);
7765   prefixq(dst, src);
7766   emit_int8(0x0F);
7767   emit_int8((unsigned char)0xC1);
7768   emit_operand(src, dst);
7769 }
7770 
7771 void Assembler::xchgq(Register dst, Address src) {
7772   InstructionMark im(this);
7773   prefixq(src, dst);
7774   emit_int8((unsigned char)0x87);
7775   emit_operand(dst, src);
7776 }
7777 
7778 void Assembler::xchgq(Register dst, Register src) {
7779   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7780   emit_int8((unsigned char)0x87);
7781   emit_int8((unsigned char)(0xc0 | encode));
7782 }
7783 
7784 void Assembler::xorq(Register dst, Register src) {
7785   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7786   emit_arith(0x33, 0xC0, dst, src);
7787 }
7788 
7789 void Assembler::xorq(Register dst, Address src) {
7790   InstructionMark im(this);
7791   prefixq(src, dst);
7792   emit_int8(0x33);
7793   emit_operand(dst, src);
7794 }
7795 
7796 #endif // !LP64