New src/cpu/x86/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && _is_evex_instruction) {
 398     switch (_tuple_type) {
 399     case EVEX_FV:
 400       if ((_evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 402       } else {
 403         mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (_input_size_in_bits) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
 463       int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
 464       if ((disp % disp_factor) == 0) {
 465         int new_disp = disp / disp_factor;
 466         if (is8bit(new_disp)) {
 467           disp = new_disp;
 468         }
 469       } else {
 470         return false;
 471       }
 472     }
 473   }
 474   return is8bit(disp);
 475 }
 476 
 477 
 478 void Assembler::emit_operand(Register reg, Register base, Register index,
 479                              Address::ScaleFactor scale, int disp,
 480                              RelocationHolder const& rspec,
 481                              int rip_relative_correction) {
 482   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 483 
 484   // Encode the registers as needed in the fields they are used in
 485 
 486   int regenc = encode(reg) << 3;
 487   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 488   int baseenc = base->is_valid() ? encode(base) : 0;
 489 
 490   if (base->is_valid()) {
 491     if (index->is_valid()) {
 492       assert(scale != Address::no_scale, "inconsistent address");
 493       // [base + index*scale + disp]
 494       if (disp == 0 && rtype == relocInfo::none  &&
 495           base != rbp LP64_ONLY(&& base != r13)) {
 496         // [base + index*scale]
 497         // [00 reg 100][ss index base]
 498         assert(index != rsp, "illegal addressing mode");
 499         emit_int8(0x04 | regenc);
 500         emit_int8(scale << 6 | indexenc | baseenc);
 501       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 502         // [base + index*scale + imm8]
 503         // [01 reg 100][ss index base] imm8
 504         assert(index != rsp, "illegal addressing mode");
 505         emit_int8(0x44 | regenc);
 506         emit_int8(scale << 6 | indexenc | baseenc);
 507         emit_int8(disp & 0xFF);
 508       } else {
 509         // [base + index*scale + disp32]
 510         // [10 reg 100][ss index base] disp32
 511         assert(index != rsp, "illegal addressing mode");
 512         emit_int8(0x84 | regenc);
 513         emit_int8(scale << 6 | indexenc | baseenc);
 514         emit_data(disp, rspec, disp32_operand);
 515       }
 516     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 517       // [rsp + disp]
 518       if (disp == 0 && rtype == relocInfo::none) {
 519         // [rsp]
 520         // [00 reg 100][00 100 100]
 521         emit_int8(0x04 | regenc);
 522         emit_int8(0x24);
 523       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 524         // [rsp + imm8]
 525         // [01 reg 100][00 100 100] disp8
 526         emit_int8(0x44 | regenc);
 527         emit_int8(0x24);
 528         emit_int8(disp & 0xFF);
 529       } else {
 530         // [rsp + imm32]
 531         // [10 reg 100][00 100 100] disp32
 532         emit_int8(0x84 | regenc);
 533         emit_int8(0x24);
 534         emit_data(disp, rspec, disp32_operand);
 535       }
 536     } else {
 537       // [base + disp]
 538       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 539       if (disp == 0 && rtype == relocInfo::none &&
 540           base != rbp LP64_ONLY(&& base != r13)) {
 541         // [base]
 542         // [00 reg base]
 543         emit_int8(0x00 | regenc | baseenc);
 544       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 545         // [base + disp8]
 546         // [01 reg base] disp8
 547         emit_int8(0x40 | regenc | baseenc);
 548         emit_int8(disp & 0xFF);
 549       } else {
 550         // [base + disp32]
 551         // [10 reg base] disp32
 552         emit_int8(0x80 | regenc | baseenc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     }
 556   } else {
 557     if (index->is_valid()) {
 558       assert(scale != Address::no_scale, "inconsistent address");
 559       // [index*scale + disp]
 560       // [00 reg 100][ss index 101] disp32
 561       assert(index != rsp, "illegal addressing mode");
 562       emit_int8(0x04 | regenc);
 563       emit_int8(scale << 6 | indexenc | 0x05);
 564       emit_data(disp, rspec, disp32_operand);
 565     } else if (rtype != relocInfo::none ) {
 566       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 567       // [00 000 101] disp32
 568 
 569       emit_int8(0x05 | regenc);
 570       // Note that the RIP-rel. correction applies to the generated
 571       // disp field, but _not_ to the target address in the rspec.
 572 
 573       // disp was created by converting the target address minus the pc
 574       // at the start of the instruction. That needs more correction here.
 575       // intptr_t disp = target - next_ip;
 576       assert(inst_mark() != NULL, "must be inside InstructionMark");
 577       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 578       int64_t adjusted = disp;
 579       // Do rip-rel adjustment for 64bit
 580       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 581       assert(is_simm32(adjusted),
 582              "must be 32bit offset (RIP relative address)");
 583       emit_data((int32_t) adjusted, rspec, disp32_operand);
 584 
 585     } else {
 586       // 32bit never did this, did everything as the rip-rel/disp code above
 587       // [disp] ABSOLUTE
 588       // [00 reg 100][00 100 101] disp32
 589       emit_int8(0x04 | regenc);
 590       emit_int8(0x25);
 591       emit_data(disp, rspec, disp32_operand);
 592     }
 593   }
 594   _is_evex_instruction = false;
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;
 628   bool is_64bit = false;
 629 
 630   debug_only(bool has_disp32 = false);
 631   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 632 
 633   again_after_prefix:
 634   switch (0xFF & *ip++) {
 635 
 636   // These convenience macros generate groups of "case" labels for the switch.
 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 639              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 640 #define REP16(x) REP8((x)+0): \
 641               case REP8((x)+8)
 642 
 643   case CS_segment:
 644   case SS_segment:
 645   case DS_segment:
 646   case ES_segment:
 647   case FS_segment:
 648   case GS_segment:
 649     // Seems dubious
 650     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 651     assert(ip == inst+1, "only one prefix allowed");
 652     goto again_after_prefix;
 653 
 654   case 0x67:
 655   case REX:
 656   case REX_B:
 657   case REX_X:
 658   case REX_XB:
 659   case REX_R:
 660   case REX_RB:
 661   case REX_RX:
 662   case REX_RXB:
 663     NOT_LP64(assert(false, "64bit prefixes"));
 664     goto again_after_prefix;
 665 
 666   case REX_W:
 667   case REX_WB:
 668   case REX_WX:
 669   case REX_WXB:
 670   case REX_WR:
 671   case REX_WRB:
 672   case REX_WRX:
 673   case REX_WRXB:
 674     NOT_LP64(assert(false, "64bit prefixes"));
 675     is_64bit = true;
 676     goto again_after_prefix;
 677 
 678   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 679   case 0x88: // movb a, r
 680   case 0x89: // movl a, r
 681   case 0x8A: // movb r, a
 682   case 0x8B: // movl r, a
 683   case 0x8F: // popl a
 684     debug_only(has_disp32 = true);
 685     break;
 686 
 687   case 0x68: // pushq #32
 688     if (which == end_pc_operand) {
 689       return ip + 4;
 690     }
 691     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 692     return ip;                  // not produced by emit_operand
 693 
 694   case 0x66: // movw ... (size prefix)
 695     again_after_size_prefix2:
 696     switch (0xFF & *ip++) {
 697     case REX:
 698     case REX_B:
 699     case REX_X:
 700     case REX_XB:
 701     case REX_R:
 702     case REX_RB:
 703     case REX_RX:
 704     case REX_RXB:
 705     case REX_W:
 706     case REX_WB:
 707     case REX_WX:
 708     case REX_WXB:
 709     case REX_WR:
 710     case REX_WRB:
 711     case REX_WRX:
 712     case REX_WRXB:
 713       NOT_LP64(assert(false, "64bit prefix found"));
 714       goto again_after_size_prefix2;
 715     case 0x8B: // movw r, a
 716     case 0x89: // movw a, r
 717       debug_only(has_disp32 = true);
 718       break;
 719     case 0xC7: // movw a, #16
 720       debug_only(has_disp32 = true);
 721       tail_size = 2;  // the imm16
 722       break;
 723     case 0x0F: // several SSE/SSE2 variants
 724       ip--;    // reparse the 0x0F
 725       goto again_after_prefix;
 726     default:
 727       ShouldNotReachHere();
 728     }
 729     break;
 730 
 731   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 732     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 733     // these asserts are somewhat nonsensical
 734 #ifndef _LP64
 735     assert(which == imm_operand || which == disp32_operand,
 736            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 737 #else
 738     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 739            which == narrow_oop_operand && !is_64bit,
 740            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 741 #endif // _LP64
 742     return ip;
 743 
 744   case 0x69: // imul r, a, #32
 745   case 0xC7: // movl a, #32(oop?)
 746     tail_size = 4;
 747     debug_only(has_disp32 = true); // has both kinds of operands!
 748     break;
 749 
 750   case 0x0F: // movx..., etc.
 751     switch (0xFF & *ip++) {
 752     case 0x3A: // pcmpestri
 753       tail_size = 1;
 754     case 0x38: // ptest, pmovzxbw
 755       ip++; // skip opcode
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757       break;
 758 
 759     case 0x70: // pshufd r, r/a, #8
 760       debug_only(has_disp32 = true); // has both kinds of operands!
 761     case 0x73: // psrldq r, #8
 762       tail_size = 1;
 763       break;
 764 
 765     case 0x12: // movlps
 766     case 0x28: // movaps
 767     case 0x2E: // ucomiss
 768     case 0x2F: // comiss
 769     case 0x54: // andps
 770     case 0x55: // andnps
 771     case 0x56: // orps
 772     case 0x57: // xorps
 773     case 0x59: //mulpd
 774     case 0x6E: // movd
 775     case 0x7E: // movd
 776     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 777       debug_only(has_disp32 = true);
 778       break;
 779 
 780     case 0xAD: // shrd r, a, %cl
 781     case 0xAF: // imul r, a
 782     case 0xBE: // movsbl r, a (movsxb)
 783     case 0xBF: // movswl r, a (movsxw)
 784     case 0xB6: // movzbl r, a (movzxb)
 785     case 0xB7: // movzwl r, a (movzxw)
 786     case REP16(0x40): // cmovl cc, r, a
 787     case 0xB0: // cmpxchgb
 788     case 0xB1: // cmpxchg
 789     case 0xC1: // xaddl
 790     case 0xC7: // cmpxchg8
 791     case REP16(0x90): // setcc a
 792       debug_only(has_disp32 = true);
 793       // fall out of the switch to decode the address
 794       break;
 795 
 796     case 0xC4: // pinsrw r, a, #8
 797       debug_only(has_disp32 = true);
 798     case 0xC5: // pextrw r, r, #8
 799       tail_size = 1;  // the imm8
 800       break;
 801 
 802     case 0xAC: // shrd r, a, #8
 803       debug_only(has_disp32 = true);
 804       tail_size = 1;  // the imm8
 805       break;
 806 
 807     case REP16(0x80): // jcc rdisp32
 808       if (which == end_pc_operand)  return ip + 4;
 809       assert(which == call32_operand, "jcc has no disp32 or imm");
 810       return ip;
 811     default:
 812       ShouldNotReachHere();
 813     }
 814     break;
 815 
 816   case 0x81: // addl a, #32; addl r, #32
 817     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 818     // on 32bit in the case of cmpl, the imm might be an oop
 819     tail_size = 4;
 820     debug_only(has_disp32 = true); // has both kinds of operands!
 821     break;
 822 
 823   case 0x83: // addl a, #8; addl r, #8
 824     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 825     debug_only(has_disp32 = true); // has both kinds of operands!
 826     tail_size = 1;
 827     break;
 828 
 829   case 0x9B:
 830     switch (0xFF & *ip++) {
 831     case 0xD9: // fnstcw a
 832       debug_only(has_disp32 = true);
 833       break;
 834     default:
 835       ShouldNotReachHere();
 836     }
 837     break;
 838 
 839   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 840   case REP4(0x10): // adc...
 841   case REP4(0x20): // and...
 842   case REP4(0x30): // xor...
 843   case REP4(0x08): // or...
 844   case REP4(0x18): // sbb...
 845   case REP4(0x28): // sub...
 846   case 0xF7: // mull a
 847   case 0x8D: // lea r, a
 848   case 0x87: // xchg r, a
 849   case REP4(0x38): // cmp...
 850   case 0x85: // test r, a
 851     debug_only(has_disp32 = true); // has both kinds of operands!
 852     break;
 853 
 854   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 855   case 0xC6: // movb a, #8
 856   case 0x80: // cmpb a, #8
 857   case 0x6B: // imul r, a, #8
 858     debug_only(has_disp32 = true); // has both kinds of operands!
 859     tail_size = 1; // the imm8
 860     break;
 861 
 862   case 0xC4: // VEX_3bytes
 863   case 0xC5: // VEX_2bytes
 864     assert((UseAVX > 0), "shouldn't have VEX prefix");
 865     assert(ip == inst+1, "no prefixes allowed");
 866     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 867     // but they have prefix 0x0F and processed when 0x0F processed above.
 868     //
 869     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 870     // instructions (these instructions are not supported in 64-bit mode).
 871     // To distinguish them bits [7:6] are set in the VEX second byte since
 872     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 873     // those VEX bits REX and vvvv bits are inverted.
 874     //
 875     // Fortunately C2 doesn't generate these instructions so we don't need
 876     // to check for them in product version.
 877 
 878     // Check second byte
 879     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 880 
 881     int vex_opcode;
 882     // First byte
 883     if ((0xFF & *inst) == VEX_3bytes) {
 884       vex_opcode = VEX_OPCODE_MASK & *ip;
 885       ip++; // third byte
 886       is_64bit = ((VEX_W & *ip) == VEX_W);
 887     } else {
 888       vex_opcode = VEX_OPCODE_0F;
 889     }
 890     ip++; // opcode
 891     // To find the end of instruction (which == end_pc_operand).
 892     switch (vex_opcode) {
 893       case VEX_OPCODE_0F:
 894         switch (0xFF & *ip) {
 895         case 0x70: // pshufd r, r/a, #8
 896         case 0x71: // ps[rl|ra|ll]w r, #8
 897         case 0x72: // ps[rl|ra|ll]d r, #8
 898         case 0x73: // ps[rl|ra|ll]q r, #8
 899         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 900         case 0xC4: // pinsrw r, r, r/a, #8
 901         case 0xC5: // pextrw r/a, r, #8
 902         case 0xC6: // shufp[s|d] r, r, r/a, #8
 903           tail_size = 1;  // the imm8
 904           break;
 905         }
 906         break;
 907       case VEX_OPCODE_0F_3A:
 908         tail_size = 1;
 909         break;
 910     }
 911     ip++; // skip opcode
 912     debug_only(has_disp32 = true); // has both kinds of operands!
 913     break;
 914 
 915   case 0x62: // EVEX_4bytes
 916     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 917     assert(ip == inst+1, "no prefixes allowed");
 918     // no EVEX collisions, all instructions that have 0x62 opcodes
 919     // have EVEX versions and are subopcodes of 0x66
 920     ip++; // skip P0 and exmaine W in P1
 921     is_64bit = ((VEX_W & *ip) == VEX_W);
 922     ip++; // move to P2
 923     ip++; // skip P2, move to opcode
 924     // To find the end of instruction (which == end_pc_operand).
 925     switch (0xFF & *ip) {
 926     case 0x61: // pcmpestri r, r/a, #8
 927     case 0x70: // pshufd r, r/a, #8
 928     case 0x73: // psrldq r, #8
 929       tail_size = 1;  // the imm8
 930       break;
 931     default:
 932       break;
 933     }
 934     ip++; // skip opcode
 935     debug_only(has_disp32 = true); // has both kinds of operands!
 936     break;
 937 
 938   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 939   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 940   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 941   case 0xDD: // fld_d a; fst_d a; fstp_d a
 942   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 943   case 0xDF: // fild_d a; fistp_d a
 944   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 945   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 946   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 947     debug_only(has_disp32 = true);
 948     break;
 949 
 950   case 0xE8: // call rdisp32
 951   case 0xE9: // jmp  rdisp32
 952     if (which == end_pc_operand)  return ip + 4;
 953     assert(which == call32_operand, "call has no disp32 or imm");
 954     return ip;
 955 
 956   case 0xF0:                    // Lock
 957     assert(os::is_MP(), "only on MP");
 958     goto again_after_prefix;
 959 
 960   case 0xF3:                    // For SSE
 961   case 0xF2:                    // For SSE2
 962     switch (0xFF & *ip++) {
 963     case REX:
 964     case REX_B:
 965     case REX_X:
 966     case REX_XB:
 967     case REX_R:
 968     case REX_RB:
 969     case REX_RX:
 970     case REX_RXB:
 971     case REX_W:
 972     case REX_WB:
 973     case REX_WX:
 974     case REX_WXB:
 975     case REX_WR:
 976     case REX_WRB:
 977     case REX_WRX:
 978     case REX_WRXB:
 979       NOT_LP64(assert(false, "found 64bit prefix"));
 980       ip++;
 981     default:
 982       ip++;
 983     }
 984     debug_only(has_disp32 = true); // has both kinds of operands!
 985     break;
 986 
 987   default:
 988     ShouldNotReachHere();
 989 
 990 #undef REP8
 991 #undef REP16
 992   }
 993 
 994   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 995 #ifdef _LP64
 996   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 997 #else
 998   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 999   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1000 #endif // LP64
1001   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1002 
1003   // parse the output of emit_operand
1004   int op2 = 0xFF & *ip++;
1005   int base = op2 & 0x07;
1006   int op3 = -1;
1007   const int b100 = 4;
1008   const int b101 = 5;
1009   if (base == b100 && (op2 >> 6) != 3) {
1010     op3 = 0xFF & *ip++;
1011     base = op3 & 0x07;   // refetch the base
1012   }
1013   // now ip points at the disp (if any)
1014 
1015   switch (op2 >> 6) {
1016   case 0:
1017     // [00 reg  100][ss index base]
1018     // [00 reg  100][00   100  esp]
1019     // [00 reg base]
1020     // [00 reg  100][ss index  101][disp32]
1021     // [00 reg  101]               [disp32]
1022 
1023     if (base == b101) {
1024       if (which == disp32_operand)
1025         return ip;              // caller wants the disp32
1026       ip += 4;                  // skip the disp32
1027     }
1028     break;
1029 
1030   case 1:
1031     // [01 reg  100][ss index base][disp8]
1032     // [01 reg  100][00   100  esp][disp8]
1033     // [01 reg base]               [disp8]
1034     ip += 1;                    // skip the disp8
1035     break;
1036 
1037   case 2:
1038     // [10 reg  100][ss index base][disp32]
1039     // [10 reg  100][00   100  esp][disp32]
1040     // [10 reg base]               [disp32]
1041     if (which == disp32_operand)
1042       return ip;                // caller wants the disp32
1043     ip += 4;                    // skip the disp32
1044     break;
1045 
1046   case 3:
1047     // [11 reg base]  (not a memory addressing mode)
1048     break;
1049   }
1050 
1051   if (which == end_pc_operand) {
1052     return ip + tail_size;
1053   }
1054 
1055 #ifdef _LP64
1056   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1057 #else
1058   assert(which == imm_operand, "instruction has only an imm field");
1059 #endif // LP64
1060   return ip;
1061 }
1062 
1063 address Assembler::locate_next_instruction(address inst) {
1064   // Secretly share code with locate_operand:
1065   return locate_operand(inst, end_pc_operand);
1066 }
1067 
1068 
1069 #ifdef ASSERT
1070 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1071   address inst = inst_mark();
1072   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1073   address opnd;
1074 
1075   Relocation* r = rspec.reloc();
1076   if (r->type() == relocInfo::none) {
1077     return;
1078   } else if (r->is_call() || format == call32_operand) {
1079     // assert(format == imm32_operand, "cannot specify a nonzero format");
1080     opnd = locate_operand(inst, call32_operand);
1081   } else if (r->is_data()) {
1082     assert(format == imm_operand || format == disp32_operand
1083            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1084     opnd = locate_operand(inst, (WhichOperand)format);
1085   } else {
1086     assert(format == imm_operand, "cannot specify a format");
1087     return;
1088   }
1089   assert(opnd == pc(), "must put operand where relocs can find it");
1090 }
1091 #endif // ASSERT
1092 
1093 void Assembler::emit_operand32(Register reg, Address adr) {
1094   assert(reg->encoding() < 8, "no extended registers");
1095   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1096   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1097                adr._rspec);
1098 }
1099 
1100 void Assembler::emit_operand(Register reg, Address adr,
1101                              int rip_relative_correction) {
1102   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1103                adr._rspec,
1104                rip_relative_correction);
1105 }
1106 
1107 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1108   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1109                adr._rspec);
1110 }
1111 
1112 // MMX operations
1113 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1114   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1115   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1116 }
1117 
1118 // work around gcc (3.2.1-7a) bug
1119 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1120   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1121   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1122 }
1123 
1124 
1125 void Assembler::emit_farith(int b1, int b2, int i) {
1126   assert(isByte(b1) && isByte(b2), "wrong opcode");
1127   assert(0 <= i &&  i < 8, "illegal stack offset");
1128   emit_int8(b1);
1129   emit_int8(b2 + i);
1130 }
1131 
1132 
1133 // Now the Assembler instructions (identical for 32/64 bits)
1134 
1135 void Assembler::adcl(Address dst, int32_t imm32) {
1136   InstructionMark im(this);
1137   prefix(dst);
1138   emit_arith_operand(0x81, rdx, dst, imm32);
1139 }
1140 
1141 void Assembler::adcl(Address dst, Register src) {
1142   InstructionMark im(this);
1143   prefix(dst, src);
1144   emit_int8(0x11);
1145   emit_operand(src, dst);
1146 }
1147 
1148 void Assembler::adcl(Register dst, int32_t imm32) {
1149   prefix(dst);
1150   emit_arith(0x81, 0xD0, dst, imm32);
1151 }
1152 
1153 void Assembler::adcl(Register dst, Address src) {
1154   InstructionMark im(this);
1155   prefix(src, dst);
1156   emit_int8(0x13);
1157   emit_operand(dst, src);
1158 }
1159 
1160 void Assembler::adcl(Register dst, Register src) {
1161   (void) prefix_and_encode(dst->encoding(), src->encoding());
1162   emit_arith(0x13, 0xC0, dst, src);
1163 }
1164 
1165 void Assembler::addl(Address dst, int32_t imm32) {
1166   InstructionMark im(this);
1167   prefix(dst);
1168   emit_arith_operand(0x81, rax, dst, imm32);
1169 }
1170 
1171 void Assembler::addl(Address dst, Register src) {
1172   InstructionMark im(this);
1173   prefix(dst, src);
1174   emit_int8(0x01);
1175   emit_operand(src, dst);
1176 }
1177 
1178 void Assembler::addl(Register dst, int32_t imm32) {
1179   prefix(dst);
1180   emit_arith(0x81, 0xC0, dst, imm32);
1181 }
1182 
1183 void Assembler::addl(Register dst, Address src) {
1184   InstructionMark im(this);
1185   prefix(src, dst);
1186   emit_int8(0x03);
1187   emit_operand(dst, src);
1188 }
1189 
1190 void Assembler::addl(Register dst, Register src) {
1191   (void) prefix_and_encode(dst->encoding(), src->encoding());
1192   emit_arith(0x03, 0xC0, dst, src);
1193 }
1194 
1195 void Assembler::addr_nop_4() {
1196   assert(UseAddressNop, "no CPU support");
1197   // 4 bytes: NOP DWORD PTR [EAX+0]
1198   emit_int8(0x0F);
1199   emit_int8(0x1F);
1200   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1201   emit_int8(0);    // 8-bits offset (1 byte)
1202 }
1203 
1204 void Assembler::addr_nop_5() {
1205   assert(UseAddressNop, "no CPU support");
1206   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1207   emit_int8(0x0F);
1208   emit_int8(0x1F);
1209   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1210   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1211   emit_int8(0);    // 8-bits offset (1 byte)
1212 }
1213 
1214 void Assembler::addr_nop_7() {
1215   assert(UseAddressNop, "no CPU support");
1216   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1217   emit_int8(0x0F);
1218   emit_int8(0x1F);
1219   emit_int8((unsigned char)0x80);
1220                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1221   emit_int32(0);   // 32-bits offset (4 bytes)
1222 }
1223 
1224 void Assembler::addr_nop_8() {
1225   assert(UseAddressNop, "no CPU support");
1226   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1227   emit_int8(0x0F);
1228   emit_int8(0x1F);
1229   emit_int8((unsigned char)0x84);
1230                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1231   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1232   emit_int32(0);   // 32-bits offset (4 bytes)
1233 }
1234 
1235 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1236   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1237   if (VM_Version::supports_evex()) {
1238     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1239   } else {
1240     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1241   }
1242 }
1243 
1244 void Assembler::addsd(XMMRegister dst, Address src) {
1245   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1246   if (VM_Version::supports_evex()) {
1247     _tuple_type = EVEX_T1S;
1248     _input_size_in_bits = EVEX_64bit;
1249     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1250   } else {
1251     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1252   }
1253 }
1254 
1255 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1257   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1258 }
1259 
1260 void Assembler::addss(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1262   if (VM_Version::supports_evex()) {
1263     _tuple_type = EVEX_T1S;
1264     _input_size_in_bits = EVEX_32bit;
1265   }
1266   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1267 }
1268 
1269 void Assembler::aesdec(XMMRegister dst, Address src) {
1270   assert(VM_Version::supports_aes(), "");
1271   InstructionMark im(this);
1272   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1273               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1274   emit_int8((unsigned char)0xDE);
1275   emit_operand(dst, src);
1276 }
1277 
1278 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1279   assert(VM_Version::supports_aes(), "");
1280   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1281                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1282   emit_int8((unsigned char)0xDE);
1283   emit_int8(0xC0 | encode);
1284 }
1285 
1286 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1287   assert(VM_Version::supports_aes(), "");
1288   InstructionMark im(this);
1289   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1290               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1291   emit_int8((unsigned char)0xDF);
1292   emit_operand(dst, src);
1293 }
1294 
1295 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1296   assert(VM_Version::supports_aes(), "");
1297   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1298                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1299   emit_int8((unsigned char)0xDF);
1300   emit_int8((unsigned char)(0xC0 | encode));
1301 }
1302 
1303 void Assembler::aesenc(XMMRegister dst, Address src) {
1304   assert(VM_Version::supports_aes(), "");
1305   InstructionMark im(this);
1306   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1307               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1308   emit_int8((unsigned char)0xDC);
1309   emit_operand(dst, src);
1310 }
1311 
1312 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1313   assert(VM_Version::supports_aes(), "");
1314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1315                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1316   emit_int8((unsigned char)0xDC);
1317   emit_int8(0xC0 | encode);
1318 }
1319 
1320 void Assembler::aesenclast(XMMRegister dst, Address src) {
1321   assert(VM_Version::supports_aes(), "");
1322   InstructionMark im(this);
1323   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1324               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1325   emit_int8((unsigned char)0xDD);
1326   emit_operand(dst, src);
1327 }
1328 
1329 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1330   assert(VM_Version::supports_aes(), "");
1331   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1332                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1333   emit_int8((unsigned char)0xDD);
1334   emit_int8((unsigned char)(0xC0 | encode));
1335 }
1336 
1337 void Assembler::andl(Address dst, int32_t imm32) {
1338   InstructionMark im(this);
1339   prefix(dst);
1340   emit_int8((unsigned char)0x81);
1341   emit_operand(rsp, dst, 4);
1342   emit_int32(imm32);
1343 }
1344 
1345 void Assembler::andl(Register dst, int32_t imm32) {
1346   prefix(dst);
1347   emit_arith(0x81, 0xE0, dst, imm32);
1348 }
1349 
1350 void Assembler::andl(Register dst, Address src) {
1351   InstructionMark im(this);
1352   prefix(src, dst);
1353   emit_int8(0x23);
1354   emit_operand(dst, src);
1355 }
1356 
1357 void Assembler::andl(Register dst, Register src) {
1358   (void) prefix_and_encode(dst->encoding(), src->encoding());
1359   emit_arith(0x23, 0xC0, dst, src);
1360 }
1361 
1362 void Assembler::andnl(Register dst, Register src1, Register src2) {
1363   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1364   int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
1365   emit_int8((unsigned char)0xF2);
1366   emit_int8((unsigned char)(0xC0 | encode));
1367 }
1368 
1369 void Assembler::andnl(Register dst, Register src1, Address src2) {
1370   InstructionMark im(this);
1371   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1372   vex_prefix_0F38_legacy(dst, src1, src2);
1373   emit_int8((unsigned char)0xF2);
1374   emit_operand(dst, src2);
1375 }
1376 
1377 void Assembler::bsfl(Register dst, Register src) {
1378   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_int8(0x0F);
1380   emit_int8((unsigned char)0xBC);
1381   emit_int8((unsigned char)(0xC0 | encode));
1382 }
1383 
1384 void Assembler::bsrl(Register dst, Register src) {
1385   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1386   emit_int8(0x0F);
1387   emit_int8((unsigned char)0xBD);
1388   emit_int8((unsigned char)(0xC0 | encode));
1389 }
1390 
1391 void Assembler::bswapl(Register reg) { // bswap
1392   int encode = prefix_and_encode(reg->encoding());
1393   emit_int8(0x0F);
1394   emit_int8((unsigned char)(0xC8 | encode));
1395 }
1396 
1397 void Assembler::blsil(Register dst, Register src) {
1398   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1399   int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
1400   emit_int8((unsigned char)0xF3);
1401   emit_int8((unsigned char)(0xC0 | encode));
1402 }
1403 
1404 void Assembler::blsil(Register dst, Address src) {
1405   InstructionMark im(this);
1406   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1407   vex_prefix_0F38_legacy(rbx, dst, src);
1408   emit_int8((unsigned char)0xF3);
1409   emit_operand(rbx, src);
1410 }
1411 
1412 void Assembler::blsmskl(Register dst, Register src) {
1413   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1414   int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
1415   emit_int8((unsigned char)0xF3);
1416   emit_int8((unsigned char)(0xC0 | encode));
1417 }
1418 
1419 void Assembler::blsmskl(Register dst, Address src) {
1420   InstructionMark im(this);
1421   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1422   vex_prefix_0F38_legacy(rdx, dst, src);
1423   emit_int8((unsigned char)0xF3);
1424   emit_operand(rdx, src);
1425 }
1426 
1427 void Assembler::blsrl(Register dst, Register src) {
1428   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1429   int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
1430   emit_int8((unsigned char)0xF3);
1431   emit_int8((unsigned char)(0xC0 | encode));
1432 }
1433 
1434 void Assembler::blsrl(Register dst, Address src) {
1435   InstructionMark im(this);
1436   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1437   vex_prefix_0F38_legacy(rcx, dst, src);
1438   emit_int8((unsigned char)0xF3);
1439   emit_operand(rcx, src);
1440 }
1441 
1442 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1443   // suspect disp32 is always good
1444   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1445 
1446   if (L.is_bound()) {
1447     const int long_size = 5;
1448     int offs = (int)( target(L) - pc() );
1449     assert(offs <= 0, "assembler error");
1450     InstructionMark im(this);
1451     // 1110 1000 #32-bit disp
1452     emit_int8((unsigned char)0xE8);
1453     emit_data(offs - long_size, rtype, operand);
1454   } else {
1455     InstructionMark im(this);
1456     // 1110 1000 #32-bit disp
1457     L.add_patch_at(code(), locator());
1458 
1459     emit_int8((unsigned char)0xE8);
1460     emit_data(int(0), rtype, operand);
1461   }
1462 }
1463 
1464 void Assembler::call(Register dst) {
1465   int encode = prefix_and_encode(dst->encoding());
1466   emit_int8((unsigned char)0xFF);
1467   emit_int8((unsigned char)(0xD0 | encode));
1468 }
1469 
1470 
1471 void Assembler::call(Address adr) {
1472   InstructionMark im(this);
1473   prefix(adr);
1474   emit_int8((unsigned char)0xFF);
1475   emit_operand(rdx, adr);
1476 }
1477 
1478 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1479   assert(entry != NULL, "call most probably wrong");
1480   InstructionMark im(this);
1481   emit_int8((unsigned char)0xE8);
1482   intptr_t disp = entry - (pc() + sizeof(int32_t));
1483   assert(is_simm32(disp), "must be 32bit offset (call2)");
1484   // Technically, should use call32_operand, but this format is
1485   // implied by the fact that we're emitting a call instruction.
1486 
1487   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1488   emit_data((int) disp, rspec, operand);
1489 }
1490 
1491 void Assembler::cdql() {
1492   emit_int8((unsigned char)0x99);
1493 }
1494 
1495 void Assembler::cld() {
1496   emit_int8((unsigned char)0xFC);
1497 }
1498 
1499 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1500   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1501   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1502   emit_int8(0x0F);
1503   emit_int8(0x40 | cc);
1504   emit_int8((unsigned char)(0xC0 | encode));
1505 }
1506 
1507 
1508 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1509   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1510   prefix(src, dst);
1511   emit_int8(0x0F);
1512   emit_int8(0x40 | cc);
1513   emit_operand(dst, src);
1514 }
1515 
1516 void Assembler::cmpb(Address dst, int imm8) {
1517   InstructionMark im(this);
1518   prefix(dst);
1519   emit_int8((unsigned char)0x80);
1520   emit_operand(rdi, dst, 1);
1521   emit_int8(imm8);
1522 }
1523 
1524 void Assembler::cmpl(Address dst, int32_t imm32) {
1525   InstructionMark im(this);
1526   prefix(dst);
1527   emit_int8((unsigned char)0x81);
1528   emit_operand(rdi, dst, 4);
1529   emit_int32(imm32);
1530 }
1531 
1532 void Assembler::cmpl(Register dst, int32_t imm32) {
1533   prefix(dst);
1534   emit_arith(0x81, 0xF8, dst, imm32);
1535 }
1536 
1537 void Assembler::cmpl(Register dst, Register src) {
1538   (void) prefix_and_encode(dst->encoding(), src->encoding());
1539   emit_arith(0x3B, 0xC0, dst, src);
1540 }
1541 
1542 
1543 void Assembler::cmpl(Register dst, Address  src) {
1544   InstructionMark im(this);
1545   prefix(src, dst);
1546   emit_int8((unsigned char)0x3B);
1547   emit_operand(dst, src);
1548 }
1549 
1550 void Assembler::cmpw(Address dst, int imm16) {
1551   InstructionMark im(this);
1552   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1553   emit_int8(0x66);
1554   emit_int8((unsigned char)0x81);
1555   emit_operand(rdi, dst, 2);
1556   emit_int16(imm16);
1557 }
1558 
1559 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1560 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1561 // The ZF is set if the compared values were equal, and cleared otherwise.
1562 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1563   InstructionMark im(this);
1564   prefix(adr, reg);
1565   emit_int8(0x0F);
1566   emit_int8((unsigned char)0xB1);
1567   emit_operand(reg, adr);
1568 }
1569 
1570 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1571 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1572 // The ZF is set if the compared values were equal, and cleared otherwise.
1573 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1574   InstructionMark im(this);
1575   prefix(adr, reg, true);
1576   emit_int8(0x0F);
1577   emit_int8((unsigned char)0xB0);
1578   emit_operand(reg, adr);
1579 }
1580 
1581 void Assembler::comisd(XMMRegister dst, Address src) {
1582   // NOTE: dbx seems to decode this as comiss even though the
1583   // 0x66 is there. Strangly ucomisd comes out correct
1584   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1585   if (VM_Version::supports_evex()) {
1586     _tuple_type = EVEX_T1S;
1587     _input_size_in_bits = EVEX_64bit;
1588     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1589   } else {
1590     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1591   }
1592 }
1593 
1594 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1596   if (VM_Version::supports_evex()) {
1597     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1598   } else {
1599     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1600   }
1601 }
1602 
1603 void Assembler::comiss(XMMRegister dst, Address src) {
1604   if (VM_Version::supports_evex()) {
1605     _tuple_type = EVEX_T1S;
1606     _input_size_in_bits = EVEX_32bit;
1607   }
1608   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1609   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1610 }
1611 
1612 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1613   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1614   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1615 }
1616 
1617 void Assembler::cpuid() {
1618   emit_int8(0x0F);
1619   emit_int8((unsigned char)0xA2);
1620 }
1621 
1622 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1623 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1624 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1625 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1626 //
1627 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1628 //
1629 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1630 //
1631 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1632 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1633   assert(VM_Version::supports_sse4_2(), "");
1634   int8_t w = 0x01;
1635   Prefix p = Prefix_EMPTY;
1636 
1637   emit_int8((int8_t)0xF2);
1638   switch (sizeInBytes) {
1639   case 1:
1640     w = 0;
1641     break;
1642   case 2:
1643   case 4:
1644     break;
1645   LP64_ONLY(case 8:)
1646     // This instruction is not valid in 32 bits
1647     // Note:
1648     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1649     //
1650     // Page B - 72   Vol. 2C says
1651     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1652     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1653     //                                                                            F0!!!
1654     // while 3 - 208 Vol. 2A
1655     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1656     //
1657     // the 0 on a last bit is reserved for a different flavor of this instruction :
1658     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1659     p = REX_W;
1660     break;
1661   default:
1662     assert(0, "Unsupported value for a sizeInBytes argument");
1663     break;
1664   }
1665   LP64_ONLY(prefix(crc, v, p);)
1666   emit_int8((int8_t)0x0F);
1667   emit_int8(0x38);
1668   emit_int8((int8_t)(0xF0 | w));
1669   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1670 }
1671 
1672 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1673   assert(VM_Version::supports_sse4_2(), "");
1674   InstructionMark im(this);
1675   int8_t w = 0x01;
1676   Prefix p = Prefix_EMPTY;
1677 
1678   emit_int8((int8_t)0xF2);
1679   switch (sizeInBytes) {
1680   case 1:
1681     w = 0;
1682     break;
1683   case 2:
1684   case 4:
1685     break;
1686   LP64_ONLY(case 8:)
1687     // This instruction is not valid in 32 bits
1688     p = REX_W;
1689     break;
1690   default:
1691     assert(0, "Unsupported value for a sizeInBytes argument");
1692     break;
1693   }
1694   LP64_ONLY(prefix(crc, adr, p);)
1695   emit_int8((int8_t)0x0F);
1696   emit_int8(0x38);
1697   emit_int8((int8_t)(0xF0 | w));
1698   emit_operand(crc, adr);
1699 }
1700 
1701 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1702   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1703   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
1704 }
1705 
1706 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1707   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
1709 }
1710 
1711 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1712   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1713   if (VM_Version::supports_evex()) {
1714     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1715   } else {
1716     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1717   }
1718 }
1719 
1720 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1721   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1722   if (VM_Version::supports_evex()) {
1723     _tuple_type = EVEX_T1F;
1724     _input_size_in_bits = EVEX_64bit;
1725     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1726   } else {
1727     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1728   }
1729 }
1730 
1731 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1732   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1733   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
1734   emit_int8(0x2A);
1735   emit_int8((unsigned char)(0xC0 | encode));
1736 }
1737 
1738 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1739   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1740   if (VM_Version::supports_evex()) {
1741     _tuple_type = EVEX_T1S;
1742     _input_size_in_bits = EVEX_32bit;
1743     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
1744   } else {
1745     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1746   }
1747 }
1748 
1749 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1750   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1751   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1752   emit_int8(0x2A);
1753   emit_int8((unsigned char)(0xC0 | encode));
1754 }
1755 
1756 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1757   if (VM_Version::supports_evex()) {
1758     _tuple_type = EVEX_T1S;
1759     _input_size_in_bits = EVEX_32bit;
1760   }
1761   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1762   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1763 }
1764 
1765 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1766   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1767   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1768   emit_int8(0x2A);
1769   emit_int8((unsigned char)(0xC0 | encode));
1770 }
1771 
1772 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1774   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1775 }
1776 
1777 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1778   if (VM_Version::supports_evex()) {
1779     _tuple_type = EVEX_T1S;
1780     _input_size_in_bits = EVEX_32bit;
1781   }
1782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1783   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1784 }
1785 
1786 
1787 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1788   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1789   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
1790   emit_int8(0x2C);
1791   emit_int8((unsigned char)(0xC0 | encode));
1792 }
1793 
1794 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1795   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1796   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
1797   emit_int8(0x2C);
1798   emit_int8((unsigned char)(0xC0 | encode));
1799 }
1800 
1801 void Assembler::decl(Address dst) {
1802   // Don't use it directly. Use MacroAssembler::decrement() instead.
1803   InstructionMark im(this);
1804   prefix(dst);
1805   emit_int8((unsigned char)0xFF);
1806   emit_operand(rcx, dst);
1807 }
1808 
1809 void Assembler::divsd(XMMRegister dst, Address src) {
1810   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1811   if (VM_Version::supports_evex()) {
1812     _tuple_type = EVEX_T1S;
1813     _input_size_in_bits = EVEX_64bit;
1814     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1815   } else {
1816     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1817   }
1818 }
1819 
1820 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1821   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1822   if (VM_Version::supports_evex()) {
1823     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1824   } else {
1825     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1826   }
1827 }
1828 
1829 void Assembler::divss(XMMRegister dst, Address src) {
1830   if (VM_Version::supports_evex()) {
1831     _tuple_type = EVEX_T1S;
1832     _input_size_in_bits = EVEX_32bit;
1833   }
1834   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1835   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1836 }
1837 
1838 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1839   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1840   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1841 }
1842 
1843 void Assembler::emms() {
1844   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1845   emit_int8(0x0F);
1846   emit_int8(0x77);
1847 }
1848 
1849 void Assembler::hlt() {
1850   emit_int8((unsigned char)0xF4);
1851 }
1852 
1853 void Assembler::idivl(Register src) {
1854   int encode = prefix_and_encode(src->encoding());
1855   emit_int8((unsigned char)0xF7);
1856   emit_int8((unsigned char)(0xF8 | encode));
1857 }
1858 
1859 void Assembler::divl(Register src) { // Unsigned
1860   int encode = prefix_and_encode(src->encoding());
1861   emit_int8((unsigned char)0xF7);
1862   emit_int8((unsigned char)(0xF0 | encode));
1863 }
1864 
1865 void Assembler::imull(Register dst, Register src) {
1866   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1867   emit_int8(0x0F);
1868   emit_int8((unsigned char)0xAF);
1869   emit_int8((unsigned char)(0xC0 | encode));
1870 }
1871 
1872 
1873 void Assembler::imull(Register dst, Register src, int value) {
1874   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1875   if (is8bit(value)) {
1876     emit_int8(0x6B);
1877     emit_int8((unsigned char)(0xC0 | encode));
1878     emit_int8(value & 0xFF);
1879   } else {
1880     emit_int8(0x69);
1881     emit_int8((unsigned char)(0xC0 | encode));
1882     emit_int32(value);
1883   }
1884 }
1885 
1886 void Assembler::imull(Register dst, Address src) {
1887   InstructionMark im(this);
1888   prefix(src, dst);
1889   emit_int8(0x0F);
1890   emit_int8((unsigned char) 0xAF);
1891   emit_operand(dst, src);
1892 }
1893 
1894 
1895 void Assembler::incl(Address dst) {
1896   // Don't use it directly. Use MacroAssembler::increment() instead.
1897   InstructionMark im(this);
1898   prefix(dst);
1899   emit_int8((unsigned char)0xFF);
1900   emit_operand(rax, dst);
1901 }
1902 
1903 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1904   InstructionMark im(this);
1905   assert((0 <= cc) && (cc < 16), "illegal cc");
1906   if (L.is_bound()) {
1907     address dst = target(L);
1908     assert(dst != NULL, "jcc most probably wrong");
1909 
1910     const int short_size = 2;
1911     const int long_size = 6;
1912     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1913     if (maybe_short && is8bit(offs - short_size)) {
1914       // 0111 tttn #8-bit disp
1915       emit_int8(0x70 | cc);
1916       emit_int8((offs - short_size) & 0xFF);
1917     } else {
1918       // 0000 1111 1000 tttn #32-bit disp
1919       assert(is_simm32(offs - long_size),
1920              "must be 32bit offset (call4)");
1921       emit_int8(0x0F);
1922       emit_int8((unsigned char)(0x80 | cc));
1923       emit_int32(offs - long_size);
1924     }
1925   } else {
1926     // Note: could eliminate cond. jumps to this jump if condition
1927     //       is the same however, seems to be rather unlikely case.
1928     // Note: use jccb() if label to be bound is very close to get
1929     //       an 8-bit displacement
1930     L.add_patch_at(code(), locator());
1931     emit_int8(0x0F);
1932     emit_int8((unsigned char)(0x80 | cc));
1933     emit_int32(0);
1934   }
1935 }
1936 
1937 void Assembler::jccb(Condition cc, Label& L) {
1938   if (L.is_bound()) {
1939     const int short_size = 2;
1940     address entry = target(L);
1941 #ifdef ASSERT
1942     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1943     intptr_t delta = short_branch_delta();
1944     if (delta != 0) {
1945       dist += (dist < 0 ? (-delta) :delta);
1946     }
1947     assert(is8bit(dist), "Dispacement too large for a short jmp");
1948 #endif
1949     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1950     // 0111 tttn #8-bit disp
1951     emit_int8(0x70 | cc);
1952     emit_int8((offs - short_size) & 0xFF);
1953   } else {
1954     InstructionMark im(this);
1955     L.add_patch_at(code(), locator());
1956     emit_int8(0x70 | cc);
1957     emit_int8(0);
1958   }
1959 }
1960 
1961 void Assembler::jmp(Address adr) {
1962   InstructionMark im(this);
1963   prefix(adr);
1964   emit_int8((unsigned char)0xFF);
1965   emit_operand(rsp, adr);
1966 }
1967 
1968 void Assembler::jmp(Label& L, bool maybe_short) {
1969   if (L.is_bound()) {
1970     address entry = target(L);
1971     assert(entry != NULL, "jmp most probably wrong");
1972     InstructionMark im(this);
1973     const int short_size = 2;
1974     const int long_size = 5;
1975     intptr_t offs = entry - pc();
1976     if (maybe_short && is8bit(offs - short_size)) {
1977       emit_int8((unsigned char)0xEB);
1978       emit_int8((offs - short_size) & 0xFF);
1979     } else {
1980       emit_int8((unsigned char)0xE9);
1981       emit_int32(offs - long_size);
1982     }
1983   } else {
1984     // By default, forward jumps are always 32-bit displacements, since
1985     // we can't yet know where the label will be bound.  If you're sure that
1986     // the forward jump will not run beyond 256 bytes, use jmpb to
1987     // force an 8-bit displacement.
1988     InstructionMark im(this);
1989     L.add_patch_at(code(), locator());
1990     emit_int8((unsigned char)0xE9);
1991     emit_int32(0);
1992   }
1993 }
1994 
1995 void Assembler::jmp(Register entry) {
1996   int encode = prefix_and_encode(entry->encoding());
1997   emit_int8((unsigned char)0xFF);
1998   emit_int8((unsigned char)(0xE0 | encode));
1999 }
2000 
2001 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2002   InstructionMark im(this);
2003   emit_int8((unsigned char)0xE9);
2004   assert(dest != NULL, "must have a target");
2005   intptr_t disp = dest - (pc() + sizeof(int32_t));
2006   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2007   emit_data(disp, rspec.reloc(), call32_operand);
2008 }
2009 
2010 void Assembler::jmpb(Label& L) {
2011   if (L.is_bound()) {
2012     const int short_size = 2;
2013     address entry = target(L);
2014     assert(entry != NULL, "jmp most probably wrong");
2015 #ifdef ASSERT
2016     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2017     intptr_t delta = short_branch_delta();
2018     if (delta != 0) {
2019       dist += (dist < 0 ? (-delta) :delta);
2020     }
2021     assert(is8bit(dist), "Dispacement too large for a short jmp");
2022 #endif
2023     intptr_t offs = entry - pc();
2024     emit_int8((unsigned char)0xEB);
2025     emit_int8((offs - short_size) & 0xFF);
2026   } else {
2027     InstructionMark im(this);
2028     L.add_patch_at(code(), locator());
2029     emit_int8((unsigned char)0xEB);
2030     emit_int8(0);
2031   }
2032 }
2033 
2034 void Assembler::ldmxcsr( Address src) {
2035   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2036   InstructionMark im(this);
2037   prefix(src);
2038   emit_int8(0x0F);
2039   emit_int8((unsigned char)0xAE);
2040   emit_operand(as_Register(2), src);
2041 }
2042 
2043 void Assembler::leal(Register dst, Address src) {
2044   InstructionMark im(this);
2045 #ifdef _LP64
2046   emit_int8(0x67); // addr32
2047   prefix(src, dst);
2048 #endif // LP64
2049   emit_int8((unsigned char)0x8D);
2050   emit_operand(dst, src);
2051 }
2052 
2053 void Assembler::lfence() {
2054   emit_int8(0x0F);
2055   emit_int8((unsigned char)0xAE);
2056   emit_int8((unsigned char)0xE8);
2057 }
2058 
2059 void Assembler::lock() {
2060   emit_int8((unsigned char)0xF0);
2061 }
2062 
2063 void Assembler::lzcntl(Register dst, Register src) {
2064   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2065   emit_int8((unsigned char)0xF3);
2066   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2067   emit_int8(0x0F);
2068   emit_int8((unsigned char)0xBD);
2069   emit_int8((unsigned char)(0xC0 | encode));
2070 }
2071 
2072 // Emit mfence instruction
2073 void Assembler::mfence() {
2074   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2075   emit_int8(0x0F);
2076   emit_int8((unsigned char)0xAE);
2077   emit_int8((unsigned char)0xF0);
2078 }
2079 
2080 void Assembler::mov(Register dst, Register src) {
2081   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2082 }
2083 
2084 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2085   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2086   if (VM_Version::supports_avx512novl()) {
2087     int vector_len = AVX_512bit;
2088     int dst_enc = dst->encoding();
2089     int src_enc = src->encoding();
2090     int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
2091                                        /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2092     emit_int8(0x28);
2093     emit_int8((unsigned char)(0xC0 | encode));
2094   } else if (VM_Version::supports_evex()) {
2095     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
2096   } else {
2097     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
2098   }
2099 }
2100 
2101 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2102   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2103   if (VM_Version::supports_avx512novl()) {
2104     int vector_len = AVX_512bit;
2105     int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
2106     emit_int8(0x28);
2107     emit_int8((unsigned char)(0xC0 | encode));
2108   } else {
2109     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
2110   }
2111 }
2112 
2113 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2114   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2115   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2116   emit_int8(0x16);
2117   emit_int8((unsigned char)(0xC0 | encode));
2118 }
2119 
2120 void Assembler::movb(Register dst, Address src) {
2121   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2122   InstructionMark im(this);
2123   prefix(src, dst, true);
2124   emit_int8((unsigned char)0x8A);
2125   emit_operand(dst, src);
2126 }
2127 
2128 void Assembler::kmovql(KRegister dst, KRegister src) {
2129   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2130   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
2131                                       /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
2132   emit_int8((unsigned char)0x90);
2133   emit_int8((unsigned char)(0xC0 | encode));
2134 }
2135 
2136 void Assembler::kmovql(KRegister dst, Address src) {
2137   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2138   int dst_enc = dst->encoding();
2139   int nds_enc = 0;
2140   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
2141              VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2142   emit_int8((unsigned char)0x90);
2143   emit_operand((Register)dst, src);
2144 }
2145 
2146 void Assembler::kmovql(Address dst, KRegister src) {
2147   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2148   int src_enc = src->encoding();
2149   int nds_enc = 0;
2150   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
2151              VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2152   emit_int8((unsigned char)0x90);
2153   emit_operand((Register)src, dst);
2154 }
2155 
2156 void Assembler::kmovql(KRegister dst, Register src) {
2157   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2158   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2159   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
2160                                       VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
2161   emit_int8((unsigned char)0x92);
2162   emit_int8((unsigned char)(0xC0 | encode));
2163 }
2164 
2165 void Assembler::kmovdl(KRegister dst, Register src) {
2166   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2167   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2168   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
2169   emit_int8((unsigned char)0x92);
2170   emit_int8((unsigned char)(0xC0 | encode));
2171 }
2172 
2173 void Assembler::kmovwl(KRegister dst, Register src) {
2174   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2175   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2176   emit_int8((unsigned char)0x92);
2177   emit_int8((unsigned char)(0xC0 | encode));
2178 }
2179 
2180 void Assembler::movb(Address dst, int imm8) {
2181   InstructionMark im(this);
2182    prefix(dst);
2183   emit_int8((unsigned char)0xC6);
2184   emit_operand(rax, dst, 1);
2185   emit_int8(imm8);
2186 }
2187 
2188 
2189 void Assembler::movb(Address dst, Register src) {
2190   assert(src->has_byte_register(), "must have byte register");
2191   InstructionMark im(this);
2192   prefix(dst, src, true);
2193   emit_int8((unsigned char)0x88);
2194   emit_operand(src, dst);
2195 }
2196 
2197 void Assembler::movdl(XMMRegister dst, Register src) {
2198   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2199   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2200   emit_int8(0x6E);
2201   emit_int8((unsigned char)(0xC0 | encode));
2202 }
2203 
2204 void Assembler::movdl(Register dst, XMMRegister src) {
2205   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2206   // swap src/dst to get correct prefix
2207   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
2208   emit_int8(0x7E);
2209   emit_int8((unsigned char)(0xC0 | encode));
2210 }
2211 
2212 void Assembler::movdl(XMMRegister dst, Address src) {
2213   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2214   if (VM_Version::supports_evex()) {
2215     _tuple_type = EVEX_T1S;
2216     _input_size_in_bits = EVEX_32bit;
2217   }
2218   InstructionMark im(this);
2219   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2220   emit_int8(0x6E);
2221   emit_operand(dst, src);
2222 }
2223 
2224 void Assembler::movdl(Address dst, XMMRegister src) {
2225   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2226   if (VM_Version::supports_evex()) {
2227     _tuple_type = EVEX_T1S;
2228     _input_size_in_bits = EVEX_32bit;
2229   }
2230   InstructionMark im(this);
2231   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2232   emit_int8(0x7E);
2233   emit_operand(src, dst);
2234 }
2235 
2236 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2237   _instruction_uses_vl = true;
2238   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2239   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2240 }
2241 
2242 void Assembler::movdqa(XMMRegister dst, Address src) {
2243   _instruction_uses_vl = true;
2244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2245   if (VM_Version::supports_evex()) {
2246     _tuple_type = EVEX_FVM;
2247   }
2248   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2249 }
2250 
2251 void Assembler::movdqu(XMMRegister dst, Address src) {
2252   _instruction_uses_vl = true;
2253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2254   if (VM_Version::supports_evex()) {
2255     _tuple_type = EVEX_FVM;
2256   }
2257   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2258 }
2259 
2260 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2261   _instruction_uses_vl = true;
2262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2263   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2264 }
2265 
2266 void Assembler::movdqu(Address dst, XMMRegister src) {
2267   _instruction_uses_vl = true;
2268   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2269   if (VM_Version::supports_evex()) {
2270     _tuple_type = EVEX_FVM;
2271   }
2272   InstructionMark im(this);
2273   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2274   emit_int8(0x7F);
2275   emit_operand(src, dst);
2276 }
2277 
2278 // Move Unaligned 256bit Vector
2279 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2280   _instruction_uses_vl = true;
2281   assert(UseAVX > 0, "");
2282   int vector_len = AVX_256bit;
2283   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2284   emit_int8(0x6F);
2285   emit_int8((unsigned char)(0xC0 | encode));
2286 }
2287 
2288 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2289   _instruction_uses_vl = true;
2290   assert(UseAVX > 0, "");
2291   if (VM_Version::supports_evex()) {
2292     _tuple_type = EVEX_FVM;
2293   }
2294   InstructionMark im(this);
2295   int vector_len = AVX_256bit;
2296   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2297   emit_int8(0x6F);
2298   emit_operand(dst, src);
2299 }
2300 
2301 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2302   _instruction_uses_vl = true;
2303   assert(UseAVX > 0, "");
2304   if (VM_Version::supports_evex()) {
2305     _tuple_type = EVEX_FVM;
2306   }
2307   InstructionMark im(this);
2308   int vector_len = AVX_256bit;
2309   // swap src<->dst for encoding
2310   assert(src != xnoreg, "sanity");
2311   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2312   emit_int8(0x7F);
2313   emit_operand(src, dst);
2314 }
2315 
2316 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2317 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2318   _instruction_uses_vl = true;
2319   assert(UseAVX > 0, "");
2320   int src_enc = src->encoding();
2321   int dst_enc = dst->encoding();
2322   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2323                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2324   emit_int8(0x6F);
2325   emit_int8((unsigned char)(0xC0 | encode));
2326 }
2327 
2328 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2329   _instruction_uses_vl = true;
2330   assert(UseAVX > 0, "");
2331   InstructionMark im(this);
2332   if (VM_Version::supports_evex()) {
2333     _tuple_type = EVEX_FVM;
2334   }
2335   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2336   emit_int8(0x6F);
2337   emit_operand(dst, src);
2338 }
2339 
2340 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2341   _instruction_uses_vl = true;
2342   assert(UseAVX > 0, "");
2343   InstructionMark im(this);
2344   assert(src != xnoreg, "sanity");
2345   if (VM_Version::supports_evex()) {
2346     _tuple_type = EVEX_FVM;
2347   }
2348   // swap src<->dst for encoding
2349   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2350   emit_int8(0x7F);
2351   emit_operand(src, dst);
2352 }
2353 
2354 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2355   _instruction_uses_vl = true;
2356   assert(UseAVX > 0, "");
2357   int src_enc = src->encoding();
2358   int dst_enc = dst->encoding();
2359   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2360                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2361   emit_int8(0x6F);
2362   emit_int8((unsigned char)(0xC0 | encode));
2363 }
2364 
2365 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2366   _instruction_uses_vl = true;
2367   assert(UseAVX > 2, "");
2368   InstructionMark im(this);
2369   _tuple_type = EVEX_FVM;
2370   vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2371   emit_int8(0x6F);
2372   emit_operand(dst, src);
2373 }
2374 
2375 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2376   _instruction_uses_vl = true;
2377   assert(UseAVX > 2, "");
2378   InstructionMark im(this);
2379   assert(src != xnoreg, "sanity");
2380   _tuple_type = EVEX_FVM;
2381   // swap src<->dst for encoding
2382   vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2383   emit_int8(0x7F);
2384   emit_operand(src, dst);
2385 }
2386 
2387 // Uses zero extension on 64bit
2388 
2389 void Assembler::movl(Register dst, int32_t imm32) {
2390   int encode = prefix_and_encode(dst->encoding());
2391   emit_int8((unsigned char)(0xB8 | encode));
2392   emit_int32(imm32);
2393 }
2394 
2395 void Assembler::movl(Register dst, Register src) {
2396   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2397   emit_int8((unsigned char)0x8B);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399 }
2400 
2401 void Assembler::movl(Register dst, Address src) {
2402   InstructionMark im(this);
2403   prefix(src, dst);
2404   emit_int8((unsigned char)0x8B);
2405   emit_operand(dst, src);
2406 }
2407 
2408 void Assembler::movl(Address dst, int32_t imm32) {
2409   InstructionMark im(this);
2410   prefix(dst);
2411   emit_int8((unsigned char)0xC7);
2412   emit_operand(rax, dst, 4);
2413   emit_int32(imm32);
2414 }
2415 
2416 void Assembler::movl(Address dst, Register src) {
2417   InstructionMark im(this);
2418   prefix(dst, src);
2419   emit_int8((unsigned char)0x89);
2420   emit_operand(src, dst);
2421 }
2422 
2423 // New cpus require to use movsd and movss to avoid partial register stall
2424 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2425 // The selection is done in MacroAssembler::movdbl() and movflt().
2426 void Assembler::movlpd(XMMRegister dst, Address src) {
2427   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2428   if (VM_Version::supports_evex()) {
2429     _tuple_type = EVEX_T1S;
2430     _input_size_in_bits = EVEX_32bit;
2431     emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2432   } else {
2433     emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2434   }
2435 }
2436 
2437 void Assembler::movq( MMXRegister dst, Address src ) {
2438   assert( VM_Version::supports_mmx(), "" );
2439   emit_int8(0x0F);
2440   emit_int8(0x6F);
2441   emit_operand(dst, src);
2442 }
2443 
2444 void Assembler::movq( Address dst, MMXRegister src ) {
2445   assert( VM_Version::supports_mmx(), "" );
2446   emit_int8(0x0F);
2447   emit_int8(0x7F);
2448   // workaround gcc (3.2.1-7a) bug
2449   // In that version of gcc with only an emit_operand(MMX, Address)
2450   // gcc will tail jump and try and reverse the parameters completely
2451   // obliterating dst in the process. By having a version available
2452   // that doesn't need to swap the args at the tail jump the bug is
2453   // avoided.
2454   emit_operand(dst, src);
2455 }
2456 
2457 void Assembler::movq(XMMRegister dst, Address src) {
2458   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2459   InstructionMark im(this);
2460   if (VM_Version::supports_evex()) {
2461     _tuple_type = EVEX_T1S;
2462     _input_size_in_bits = EVEX_64bit;
2463     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2464   } else {
2465     simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2466   }
2467   emit_int8(0x7E);
2468   emit_operand(dst, src);
2469 }
2470 
2471 void Assembler::movq(Address dst, XMMRegister src) {
2472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473   InstructionMark im(this);
2474   if (VM_Version::supports_evex()) {
2475     _tuple_type = EVEX_T1S;
2476     _input_size_in_bits = EVEX_64bit;
2477     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
2478                 VEX_OPCODE_0F, /* rex_w */ true);
2479   } else {
2480     simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2481   }
2482   emit_int8((unsigned char)0xD6);
2483   emit_operand(src, dst);
2484 }
2485 
2486 void Assembler::movsbl(Register dst, Address src) { // movsxb
2487   InstructionMark im(this);
2488   prefix(src, dst);
2489   emit_int8(0x0F);
2490   emit_int8((unsigned char)0xBE);
2491   emit_operand(dst, src);
2492 }
2493 
2494 void Assembler::movsbl(Register dst, Register src) { // movsxb
2495   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2496   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2497   emit_int8(0x0F);
2498   emit_int8((unsigned char)0xBE);
2499   emit_int8((unsigned char)(0xC0 | encode));
2500 }
2501 
2502 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2503   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2504   if (VM_Version::supports_evex()) {
2505     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2506   } else {
2507     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2508   }
2509 }
2510 
2511 void Assembler::movsd(XMMRegister dst, Address src) {
2512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2513   if (VM_Version::supports_evex()) {
2514     _tuple_type = EVEX_T1S;
2515     _input_size_in_bits = EVEX_64bit;
2516     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2517   } else {
2518     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2519   }
2520 }
2521 
2522 void Assembler::movsd(Address dst, XMMRegister src) {
2523   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2524   InstructionMark im(this);
2525   if (VM_Version::supports_evex()) {
2526     _tuple_type = EVEX_T1S;
2527     _input_size_in_bits = EVEX_64bit;
2528     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2529   } else {
2530     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
2531   }
2532   emit_int8(0x11);
2533   emit_operand(src, dst);
2534 }
2535 
2536 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2537   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2538   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2539 }
2540 
2541 void Assembler::movss(XMMRegister dst, Address src) {
2542   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2543   if (VM_Version::supports_evex()) {
2544     _tuple_type = EVEX_T1S;
2545     _input_size_in_bits = EVEX_32bit;
2546   }
2547   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2548 }
2549 
2550 void Assembler::movss(Address dst, XMMRegister src) {
2551   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2552   if (VM_Version::supports_evex()) {
2553     _tuple_type = EVEX_T1S;
2554     _input_size_in_bits = EVEX_32bit;
2555   }
2556   InstructionMark im(this);
2557   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2558   emit_int8(0x11);
2559   emit_operand(src, dst);
2560 }
2561 
2562 void Assembler::movswl(Register dst, Address src) { // movsxw
2563   InstructionMark im(this);
2564   prefix(src, dst);
2565   emit_int8(0x0F);
2566   emit_int8((unsigned char)0xBF);
2567   emit_operand(dst, src);
2568 }
2569 
2570 void Assembler::movswl(Register dst, Register src) { // movsxw
2571   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2572   emit_int8(0x0F);
2573   emit_int8((unsigned char)0xBF);
2574   emit_int8((unsigned char)(0xC0 | encode));
2575 }
2576 
2577 void Assembler::movw(Address dst, int imm16) {
2578   InstructionMark im(this);
2579 
2580   emit_int8(0x66); // switch to 16-bit mode
2581   prefix(dst);
2582   emit_int8((unsigned char)0xC7);
2583   emit_operand(rax, dst, 2);
2584   emit_int16(imm16);
2585 }
2586 
2587 void Assembler::movw(Register dst, Address src) {
2588   InstructionMark im(this);
2589   emit_int8(0x66);
2590   prefix(src, dst);
2591   emit_int8((unsigned char)0x8B);
2592   emit_operand(dst, src);
2593 }
2594 
2595 void Assembler::movw(Address dst, Register src) {
2596   InstructionMark im(this);
2597   emit_int8(0x66);
2598   prefix(dst, src);
2599   emit_int8((unsigned char)0x89);
2600   emit_operand(src, dst);
2601 }
2602 
2603 void Assembler::movzbl(Register dst, Address src) { // movzxb
2604   InstructionMark im(this);
2605   prefix(src, dst);
2606   emit_int8(0x0F);
2607   emit_int8((unsigned char)0xB6);
2608   emit_operand(dst, src);
2609 }
2610 
2611 void Assembler::movzbl(Register dst, Register src) { // movzxb
2612   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2613   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2614   emit_int8(0x0F);
2615   emit_int8((unsigned char)0xB6);
2616   emit_int8(0xC0 | encode);
2617 }
2618 
2619 void Assembler::movzwl(Register dst, Address src) { // movzxw
2620   InstructionMark im(this);
2621   prefix(src, dst);
2622   emit_int8(0x0F);
2623   emit_int8((unsigned char)0xB7);
2624   emit_operand(dst, src);
2625 }
2626 
2627 void Assembler::movzwl(Register dst, Register src) { // movzxw
2628   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2629   emit_int8(0x0F);
2630   emit_int8((unsigned char)0xB7);
2631   emit_int8(0xC0 | encode);
2632 }
2633 
2634 void Assembler::mull(Address src) {
2635   InstructionMark im(this);
2636   prefix(src);
2637   emit_int8((unsigned char)0xF7);
2638   emit_operand(rsp, src);
2639 }
2640 
2641 void Assembler::mull(Register src) {
2642   int encode = prefix_and_encode(src->encoding());
2643   emit_int8((unsigned char)0xF7);
2644   emit_int8((unsigned char)(0xE0 | encode));
2645 }
2646 
2647 void Assembler::mulsd(XMMRegister dst, Address src) {
2648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2649   if (VM_Version::supports_evex()) {
2650     _tuple_type = EVEX_T1S;
2651     _input_size_in_bits = EVEX_64bit;
2652     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2653   } else {
2654     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2655   }
2656 }
2657 
2658 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2660   if (VM_Version::supports_evex()) {
2661     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2662   } else {
2663     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2664   }
2665 }
2666 
2667 void Assembler::mulss(XMMRegister dst, Address src) {
2668   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2669   if (VM_Version::supports_evex()) {
2670     _tuple_type = EVEX_T1S;
2671     _input_size_in_bits = EVEX_32bit;
2672   }
2673   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2674 }
2675 
2676 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2677   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2678   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2679 }
2680 
2681 void Assembler::negl(Register dst) {
2682   int encode = prefix_and_encode(dst->encoding());
2683   emit_int8((unsigned char)0xF7);
2684   emit_int8((unsigned char)(0xD8 | encode));
2685 }
2686 
2687 void Assembler::nop(int i) {
2688 #ifdef ASSERT
2689   assert(i > 0, " ");
2690   // The fancy nops aren't currently recognized by debuggers making it a
2691   // pain to disassemble code while debugging. If asserts are on clearly
2692   // speed is not an issue so simply use the single byte traditional nop
2693   // to do alignment.
2694 
2695   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2696   return;
2697 
2698 #endif // ASSERT
2699 
2700   if (UseAddressNop && VM_Version::is_intel()) {
2701     //
2702     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2703     //  1: 0x90
2704     //  2: 0x66 0x90
2705     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2706     //  4: 0x0F 0x1F 0x40 0x00
2707     //  5: 0x0F 0x1F 0x44 0x00 0x00
2708     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2709     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2710     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2711     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2712     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2713     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2714 
2715     // The rest coding is Intel specific - don't use consecutive address nops
2716 
2717     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2718     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2719     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2720     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2721 
2722     while(i >= 15) {
2723       // For Intel don't generate consecutive addess nops (mix with regular nops)
2724       i -= 15;
2725       emit_int8(0x66);   // size prefix
2726       emit_int8(0x66);   // size prefix
2727       emit_int8(0x66);   // size prefix
2728       addr_nop_8();
2729       emit_int8(0x66);   // size prefix
2730       emit_int8(0x66);   // size prefix
2731       emit_int8(0x66);   // size prefix
2732       emit_int8((unsigned char)0x90);
2733                          // nop
2734     }
2735     switch (i) {
2736       case 14:
2737         emit_int8(0x66); // size prefix
2738       case 13:
2739         emit_int8(0x66); // size prefix
2740       case 12:
2741         addr_nop_8();
2742         emit_int8(0x66); // size prefix
2743         emit_int8(0x66); // size prefix
2744         emit_int8(0x66); // size prefix
2745         emit_int8((unsigned char)0x90);
2746                          // nop
2747         break;
2748       case 11:
2749         emit_int8(0x66); // size prefix
2750       case 10:
2751         emit_int8(0x66); // size prefix
2752       case 9:
2753         emit_int8(0x66); // size prefix
2754       case 8:
2755         addr_nop_8();
2756         break;
2757       case 7:
2758         addr_nop_7();
2759         break;
2760       case 6:
2761         emit_int8(0x66); // size prefix
2762       case 5:
2763         addr_nop_5();
2764         break;
2765       case 4:
2766         addr_nop_4();
2767         break;
2768       case 3:
2769         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2770         emit_int8(0x66); // size prefix
2771       case 2:
2772         emit_int8(0x66); // size prefix
2773       case 1:
2774         emit_int8((unsigned char)0x90);
2775                          // nop
2776         break;
2777       default:
2778         assert(i == 0, " ");
2779     }
2780     return;
2781   }
2782   if (UseAddressNop && VM_Version::is_amd()) {
2783     //
2784     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2785     //  1: 0x90
2786     //  2: 0x66 0x90
2787     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2788     //  4: 0x0F 0x1F 0x40 0x00
2789     //  5: 0x0F 0x1F 0x44 0x00 0x00
2790     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2791     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2792     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2793     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2794     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2795     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2796 
2797     // The rest coding is AMD specific - use consecutive address nops
2798 
2799     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2800     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2801     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2802     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2803     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2804     //     Size prefixes (0x66) are added for larger sizes
2805 
2806     while(i >= 22) {
2807       i -= 11;
2808       emit_int8(0x66); // size prefix
2809       emit_int8(0x66); // size prefix
2810       emit_int8(0x66); // size prefix
2811       addr_nop_8();
2812     }
2813     // Generate first nop for size between 21-12
2814     switch (i) {
2815       case 21:
2816         i -= 1;
2817         emit_int8(0x66); // size prefix
2818       case 20:
2819       case 19:
2820         i -= 1;
2821         emit_int8(0x66); // size prefix
2822       case 18:
2823       case 17:
2824         i -= 1;
2825         emit_int8(0x66); // size prefix
2826       case 16:
2827       case 15:
2828         i -= 8;
2829         addr_nop_8();
2830         break;
2831       case 14:
2832       case 13:
2833         i -= 7;
2834         addr_nop_7();
2835         break;
2836       case 12:
2837         i -= 6;
2838         emit_int8(0x66); // size prefix
2839         addr_nop_5();
2840         break;
2841       default:
2842         assert(i < 12, " ");
2843     }
2844 
2845     // Generate second nop for size between 11-1
2846     switch (i) {
2847       case 11:
2848         emit_int8(0x66); // size prefix
2849       case 10:
2850         emit_int8(0x66); // size prefix
2851       case 9:
2852         emit_int8(0x66); // size prefix
2853       case 8:
2854         addr_nop_8();
2855         break;
2856       case 7:
2857         addr_nop_7();
2858         break;
2859       case 6:
2860         emit_int8(0x66); // size prefix
2861       case 5:
2862         addr_nop_5();
2863         break;
2864       case 4:
2865         addr_nop_4();
2866         break;
2867       case 3:
2868         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2869         emit_int8(0x66); // size prefix
2870       case 2:
2871         emit_int8(0x66); // size prefix
2872       case 1:
2873         emit_int8((unsigned char)0x90);
2874                          // nop
2875         break;
2876       default:
2877         assert(i == 0, " ");
2878     }
2879     return;
2880   }
2881 
2882   // Using nops with size prefixes "0x66 0x90".
2883   // From AMD Optimization Guide:
2884   //  1: 0x90
2885   //  2: 0x66 0x90
2886   //  3: 0x66 0x66 0x90
2887   //  4: 0x66 0x66 0x66 0x90
2888   //  5: 0x66 0x66 0x90 0x66 0x90
2889   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2890   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2891   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2892   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2893   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2894   //
2895   while(i > 12) {
2896     i -= 4;
2897     emit_int8(0x66); // size prefix
2898     emit_int8(0x66);
2899     emit_int8(0x66);
2900     emit_int8((unsigned char)0x90);
2901                      // nop
2902   }
2903   // 1 - 12 nops
2904   if(i > 8) {
2905     if(i > 9) {
2906       i -= 1;
2907       emit_int8(0x66);
2908     }
2909     i -= 3;
2910     emit_int8(0x66);
2911     emit_int8(0x66);
2912     emit_int8((unsigned char)0x90);
2913   }
2914   // 1 - 8 nops
2915   if(i > 4) {
2916     if(i > 6) {
2917       i -= 1;
2918       emit_int8(0x66);
2919     }
2920     i -= 3;
2921     emit_int8(0x66);
2922     emit_int8(0x66);
2923     emit_int8((unsigned char)0x90);
2924   }
2925   switch (i) {
2926     case 4:
2927       emit_int8(0x66);
2928     case 3:
2929       emit_int8(0x66);
2930     case 2:
2931       emit_int8(0x66);
2932     case 1:
2933       emit_int8((unsigned char)0x90);
2934       break;
2935     default:
2936       assert(i == 0, " ");
2937   }
2938 }
2939 
2940 void Assembler::notl(Register dst) {
2941   int encode = prefix_and_encode(dst->encoding());
2942   emit_int8((unsigned char)0xF7);
2943   emit_int8((unsigned char)(0xD0 | encode));
2944 }
2945 
2946 void Assembler::orl(Address dst, int32_t imm32) {
2947   InstructionMark im(this);
2948   prefix(dst);
2949   emit_arith_operand(0x81, rcx, dst, imm32);
2950 }
2951 
2952 void Assembler::orl(Register dst, int32_t imm32) {
2953   prefix(dst);
2954   emit_arith(0x81, 0xC8, dst, imm32);
2955 }
2956 
2957 void Assembler::orl(Register dst, Address src) {
2958   InstructionMark im(this);
2959   prefix(src, dst);
2960   emit_int8(0x0B);
2961   emit_operand(dst, src);
2962 }
2963 
2964 void Assembler::orl(Register dst, Register src) {
2965   (void) prefix_and_encode(dst->encoding(), src->encoding());
2966   emit_arith(0x0B, 0xC0, dst, src);
2967 }
2968 
2969 void Assembler::orl(Address dst, Register src) {
2970   InstructionMark im(this);
2971   prefix(dst, src);
2972   emit_int8(0x09);
2973   emit_operand(src, dst);
2974 }
2975 
2976 void Assembler::packuswb(XMMRegister dst, Address src) {
2977   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2978   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2979   if (VM_Version::supports_evex()) {
2980     _tuple_type = EVEX_FV;
2981     _input_size_in_bits = EVEX_32bit;
2982   }
2983   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2984 }
2985 
2986 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2988   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2989 }
2990 
2991 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
2992   assert(UseAVX > 0, "some form of AVX must be enabled");
2993   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2994 }
2995 
2996 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
2997   _instruction_uses_vl = true;
2998   assert(VM_Version::supports_avx2(), "");
2999   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3000                                       VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
3001   emit_int8(0x00);
3002   emit_int8(0xC0 | encode);
3003   emit_int8(imm8);
3004 }
3005 
3006 void Assembler::pause() {
3007   emit_int8((unsigned char)0xF3);
3008   emit_int8((unsigned char)0x90);
3009 }
3010 
3011 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3012   assert(VM_Version::supports_sse4_2(), "");
3013   InstructionMark im(this);
3014   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
3015               /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3016   emit_int8(0x61);
3017   emit_operand(dst, src);
3018   emit_int8(imm8);
3019 }
3020 
3021 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3022   assert(VM_Version::supports_sse4_2(), "");
3023   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3024                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3025   emit_int8(0x61);
3026   emit_int8((unsigned char)(0xC0 | encode));
3027   emit_int8(imm8);
3028 }
3029 
3030 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3031   assert(VM_Version::supports_sse4_1(), "");
3032   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3033                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3034   emit_int8(0x16);
3035   emit_int8((unsigned char)(0xC0 | encode));
3036   emit_int8(imm8);
3037 }
3038 
3039 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3040   assert(VM_Version::supports_sse4_1(), "");
3041   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
3042                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3043   emit_int8(0x16);
3044   emit_int8((unsigned char)(0xC0 | encode));
3045   emit_int8(imm8);
3046 }
3047 
3048 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3049   assert(VM_Version::supports_sse2(), "");
3050   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3051                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3052   emit_int8((unsigned char)0xC5);
3053   emit_int8((unsigned char)(0xC0 | encode));
3054   emit_int8(imm8);
3055 }
3056 
3057 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3058   assert(VM_Version::supports_sse4_1(), "");
3059   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3060                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3061   emit_int8(0x22);
3062   emit_int8((unsigned char)(0xC0 | encode));
3063   emit_int8(imm8);
3064 }
3065 
3066 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3067   assert(VM_Version::supports_sse4_1(), "");
3068   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3069                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3070   emit_int8(0x22);
3071   emit_int8((unsigned char)(0xC0 | encode));
3072   emit_int8(imm8);
3073 }
3074 
3075 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3076   assert(VM_Version::supports_sse2(), "");
3077   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3078                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3079   emit_int8((unsigned char)0xC4);
3080   emit_int8((unsigned char)(0xC0 | encode));
3081   emit_int8(imm8);
3082 }
3083 
3084 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3085   assert(VM_Version::supports_sse4_1(), "");
3086   if (VM_Version::supports_evex()) {
3087     _tuple_type = EVEX_HVM;
3088   }
3089   InstructionMark im(this);
3090   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3091   emit_int8(0x30);
3092   emit_operand(dst, src);
3093 }
3094 
3095 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3096   assert(VM_Version::supports_sse4_1(), "");
3097   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3098   emit_int8(0x30);
3099   emit_int8((unsigned char)(0xC0 | encode));
3100 }
3101 
3102 // generic
3103 void Assembler::pop(Register dst) {
3104   int encode = prefix_and_encode(dst->encoding());
3105   emit_int8(0x58 | encode);
3106 }
3107 
3108 void Assembler::popcntl(Register dst, Address src) {
3109   assert(VM_Version::supports_popcnt(), "must support");
3110   InstructionMark im(this);
3111   emit_int8((unsigned char)0xF3);
3112   prefix(src, dst);
3113   emit_int8(0x0F);
3114   emit_int8((unsigned char)0xB8);
3115   emit_operand(dst, src);
3116 }
3117 
3118 void Assembler::popcntl(Register dst, Register src) {
3119   assert(VM_Version::supports_popcnt(), "must support");
3120   emit_int8((unsigned char)0xF3);
3121   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3122   emit_int8(0x0F);
3123   emit_int8((unsigned char)0xB8);
3124   emit_int8((unsigned char)(0xC0 | encode));
3125 }
3126 
3127 void Assembler::popf() {
3128   emit_int8((unsigned char)0x9D);
3129 }
3130 
3131 #ifndef _LP64 // no 32bit push/pop on amd64
3132 void Assembler::popl(Address dst) {
3133   // NOTE: this will adjust stack by 8byte on 64bits
3134   InstructionMark im(this);
3135   prefix(dst);
3136   emit_int8((unsigned char)0x8F);
3137   emit_operand(rax, dst);
3138 }
3139 #endif
3140 
3141 void Assembler::prefetch_prefix(Address src) {
3142   prefix(src);
3143   emit_int8(0x0F);
3144 }
3145 
3146 void Assembler::prefetchnta(Address src) {
3147   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3148   InstructionMark im(this);
3149   prefetch_prefix(src);
3150   emit_int8(0x18);
3151   emit_operand(rax, src); // 0, src
3152 }
3153 
3154 void Assembler::prefetchr(Address src) {
3155   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3156   InstructionMark im(this);
3157   prefetch_prefix(src);
3158   emit_int8(0x0D);
3159   emit_operand(rax, src); // 0, src
3160 }
3161 
3162 void Assembler::prefetcht0(Address src) {
3163   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3164   InstructionMark im(this);
3165   prefetch_prefix(src);
3166   emit_int8(0x18);
3167   emit_operand(rcx, src); // 1, src
3168 }
3169 
3170 void Assembler::prefetcht1(Address src) {
3171   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3172   InstructionMark im(this);
3173   prefetch_prefix(src);
3174   emit_int8(0x18);
3175   emit_operand(rdx, src); // 2, src
3176 }
3177 
3178 void Assembler::prefetcht2(Address src) {
3179   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3180   InstructionMark im(this);
3181   prefetch_prefix(src);
3182   emit_int8(0x18);
3183   emit_operand(rbx, src); // 3, src
3184 }
3185 
3186 void Assembler::prefetchw(Address src) {
3187   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3188   InstructionMark im(this);
3189   prefetch_prefix(src);
3190   emit_int8(0x0D);
3191   emit_operand(rcx, src); // 1, src
3192 }
3193 
3194 void Assembler::prefix(Prefix p) {
3195   emit_int8(p);
3196 }
3197 
3198 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3199   assert(VM_Version::supports_ssse3(), "");
3200   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3201                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3202   emit_int8(0x00);
3203   emit_int8((unsigned char)(0xC0 | encode));
3204 }
3205 
3206 void Assembler::pshufb(XMMRegister dst, Address src) {
3207   assert(VM_Version::supports_ssse3(), "");
3208   if (VM_Version::supports_evex()) {
3209     _tuple_type = EVEX_FVM;
3210   }
3211   InstructionMark im(this);
3212   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3213               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3214   emit_int8(0x00);
3215   emit_operand(dst, src);
3216 }
3217 
3218 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3219   _instruction_uses_vl = true;
3220   assert(isByte(mode), "invalid value");
3221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3222   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
3223   emit_int8(mode & 0xFF);
3224 }
3225 
3226 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3227   _instruction_uses_vl = true;
3228   assert(isByte(mode), "invalid value");
3229   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3230   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3231   if (VM_Version::supports_evex()) {
3232     _tuple_type = EVEX_FV;
3233     _input_size_in_bits = EVEX_32bit;
3234   }
3235   InstructionMark im(this);
3236   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
3237   emit_int8(0x70);
3238   emit_operand(dst, src);
3239   emit_int8(mode & 0xFF);
3240 }
3241 
3242 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3243   assert(isByte(mode), "invalid value");
3244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3245   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
3246   emit_int8(mode & 0xFF);
3247 }
3248 
3249 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3250   assert(isByte(mode), "invalid value");
3251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3252   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3253   if (VM_Version::supports_evex()) {
3254     _tuple_type = EVEX_FVM;
3255   }
3256   InstructionMark im(this);
3257   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
3258               VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3259   emit_int8(0x70);
3260   emit_operand(dst, src);
3261   emit_int8(mode & 0xFF);
3262 }
3263 
3264 void Assembler::psrldq(XMMRegister dst, int shift) {
3265   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3266   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3267   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
3268   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3269                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3270   emit_int8(0x73);
3271   emit_int8((unsigned char)(0xC0 | encode));
3272   emit_int8(shift);
3273 }
3274 
3275 void Assembler::pslldq(XMMRegister dst, int shift) {
3276   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3277   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3278   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
3279   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3280                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3281   emit_int8(0x73);
3282   emit_int8((unsigned char)(0xC0 | encode));
3283   emit_int8(shift);
3284 }
3285 
3286 void Assembler::ptest(XMMRegister dst, Address src) {
3287   assert(VM_Version::supports_sse4_1(), "");
3288   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3289   InstructionMark im(this);
3290   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3291               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3292   emit_int8(0x17);
3293   emit_operand(dst, src);
3294 }
3295 
3296 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
3297   assert(VM_Version::supports_sse4_1(), "");
3298   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3299                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3300   emit_int8(0x17);
3301   emit_int8((unsigned char)(0xC0 | encode));
3302 }
3303 
3304 void Assembler::vptest(XMMRegister dst, Address src) {
3305   assert(VM_Version::supports_avx(), "");
3306   InstructionMark im(this);
3307   int vector_len = AVX_256bit;
3308   assert(dst != xnoreg, "sanity");
3309   int dst_enc = dst->encoding();
3310   // swap src<->dst for encoding
3311   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
3312              vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
3313   emit_int8(0x17);
3314   emit_operand(dst, src);
3315 }
3316 
3317 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
3318   assert(VM_Version::supports_avx(), "");
3319   int vector_len = AVX_256bit;
3320   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
3321   emit_int8(0x17);
3322   emit_int8((unsigned char)(0xC0 | encode));
3323 }
3324 
3325 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3326   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3327   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3328   if (VM_Version::supports_evex()) {
3329     _tuple_type = EVEX_FVM;
3330   }
3331   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3332 }
3333 
3334 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3335   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3336   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3337 }
3338 
3339 void Assembler::punpckldq(XMMRegister dst, Address src) {
3340   _instruction_uses_vl = true;
3341   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3342   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3343   if (VM_Version::supports_evex()) {
3344     _tuple_type = EVEX_FV;
3345     _input_size_in_bits = EVEX_32bit;
3346   }
3347   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3348 }
3349 
3350 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3351   _instruction_uses_vl = true;
3352   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3353   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3354 }
3355 
3356 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3357   _instruction_uses_vl = true;
3358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3359   if (VM_Version::supports_evex()) {
3360     emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
3361   } else {
3362     emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3363   }
3364 }
3365 
3366 void Assembler::push(int32_t imm32) {
3367   // in 64bits we push 64bits onto the stack but only
3368   // take a 32bit immediate
3369   emit_int8(0x68);
3370   emit_int32(imm32);
3371 }
3372 
3373 void Assembler::push(Register src) {
3374   int encode = prefix_and_encode(src->encoding());
3375 
3376   emit_int8(0x50 | encode);
3377 }
3378 
3379 void Assembler::pushf() {
3380   emit_int8((unsigned char)0x9C);
3381 }
3382 
3383 #ifndef _LP64 // no 32bit push/pop on amd64
3384 void Assembler::pushl(Address src) {
3385   // Note this will push 64bit on 64bit
3386   InstructionMark im(this);
3387   prefix(src);
3388   emit_int8((unsigned char)0xFF);
3389   emit_operand(rsi, src);
3390 }
3391 #endif
3392 
3393 void Assembler::rcll(Register dst, int imm8) {
3394   assert(isShiftCount(imm8), "illegal shift count");
3395   int encode = prefix_and_encode(dst->encoding());
3396   if (imm8 == 1) {
3397     emit_int8((unsigned char)0xD1);
3398     emit_int8((unsigned char)(0xD0 | encode));
3399   } else {
3400     emit_int8((unsigned char)0xC1);
3401     emit_int8((unsigned char)0xD0 | encode);
3402     emit_int8(imm8);
3403   }
3404 }
3405 
3406 void Assembler::rdtsc() {
3407   emit_int8((unsigned char)0x0F);
3408   emit_int8((unsigned char)0x31);
3409 }
3410 
3411 // copies data from [esi] to [edi] using rcx pointer sized words
3412 // generic
3413 void Assembler::rep_mov() {
3414   emit_int8((unsigned char)0xF3);
3415   // MOVSQ
3416   LP64_ONLY(prefix(REX_W));
3417   emit_int8((unsigned char)0xA5);
3418 }
3419 
3420 // sets rcx bytes with rax, value at [edi]
3421 void Assembler::rep_stosb() {
3422   emit_int8((unsigned char)0xF3); // REP
3423   LP64_ONLY(prefix(REX_W));
3424   emit_int8((unsigned char)0xAA); // STOSB
3425 }
3426 
3427 // sets rcx pointer sized words with rax, value at [edi]
3428 // generic
3429 void Assembler::rep_stos() {
3430   emit_int8((unsigned char)0xF3); // REP
3431   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
3432   emit_int8((unsigned char)0xAB);
3433 }
3434 
3435 // scans rcx pointer sized words at [edi] for occurance of rax,
3436 // generic
3437 void Assembler::repne_scan() { // repne_scan
3438   emit_int8((unsigned char)0xF2);
3439   // SCASQ
3440   LP64_ONLY(prefix(REX_W));
3441   emit_int8((unsigned char)0xAF);
3442 }
3443 
3444 #ifdef _LP64
3445 // scans rcx 4 byte words at [edi] for occurance of rax,
3446 // generic
3447 void Assembler::repne_scanl() { // repne_scan
3448   emit_int8((unsigned char)0xF2);
3449   // SCASL
3450   emit_int8((unsigned char)0xAF);
3451 }
3452 #endif
3453 
3454 void Assembler::ret(int imm16) {
3455   if (imm16 == 0) {
3456     emit_int8((unsigned char)0xC3);
3457   } else {
3458     emit_int8((unsigned char)0xC2);
3459     emit_int16(imm16);
3460   }
3461 }
3462 
3463 void Assembler::sahf() {
3464 #ifdef _LP64
3465   // Not supported in 64bit mode
3466   ShouldNotReachHere();
3467 #endif
3468   emit_int8((unsigned char)0x9E);
3469 }
3470 
3471 void Assembler::sarl(Register dst, int imm8) {
3472   int encode = prefix_and_encode(dst->encoding());
3473   assert(isShiftCount(imm8), "illegal shift count");
3474   if (imm8 == 1) {
3475     emit_int8((unsigned char)0xD1);
3476     emit_int8((unsigned char)(0xF8 | encode));
3477   } else {
3478     emit_int8((unsigned char)0xC1);
3479     emit_int8((unsigned char)(0xF8 | encode));
3480     emit_int8(imm8);
3481   }
3482 }
3483 
3484 void Assembler::sarl(Register dst) {
3485   int encode = prefix_and_encode(dst->encoding());
3486   emit_int8((unsigned char)0xD3);
3487   emit_int8((unsigned char)(0xF8 | encode));
3488 }
3489 
3490 void Assembler::sbbl(Address dst, int32_t imm32) {
3491   InstructionMark im(this);
3492   prefix(dst);
3493   emit_arith_operand(0x81, rbx, dst, imm32);
3494 }
3495 
3496 void Assembler::sbbl(Register dst, int32_t imm32) {
3497   prefix(dst);
3498   emit_arith(0x81, 0xD8, dst, imm32);
3499 }
3500 
3501 
3502 void Assembler::sbbl(Register dst, Address src) {
3503   InstructionMark im(this);
3504   prefix(src, dst);
3505   emit_int8(0x1B);
3506   emit_operand(dst, src);
3507 }
3508 
3509 void Assembler::sbbl(Register dst, Register src) {
3510   (void) prefix_and_encode(dst->encoding(), src->encoding());
3511   emit_arith(0x1B, 0xC0, dst, src);
3512 }
3513 
3514 void Assembler::setb(Condition cc, Register dst) {
3515   assert(0 <= cc && cc < 16, "illegal cc");
3516   int encode = prefix_and_encode(dst->encoding(), true);
3517   emit_int8(0x0F);
3518   emit_int8((unsigned char)0x90 | cc);
3519   emit_int8((unsigned char)(0xC0 | encode));
3520 }
3521 
3522 void Assembler::shll(Register dst, int imm8) {
3523   assert(isShiftCount(imm8), "illegal shift count");
3524   int encode = prefix_and_encode(dst->encoding());
3525   if (imm8 == 1 ) {
3526     emit_int8((unsigned char)0xD1);
3527     emit_int8((unsigned char)(0xE0 | encode));
3528   } else {
3529     emit_int8((unsigned char)0xC1);
3530     emit_int8((unsigned char)(0xE0 | encode));
3531     emit_int8(imm8);
3532   }
3533 }
3534 
3535 void Assembler::shll(Register dst) {
3536   int encode = prefix_and_encode(dst->encoding());
3537   emit_int8((unsigned char)0xD3);
3538   emit_int8((unsigned char)(0xE0 | encode));
3539 }
3540 
3541 void Assembler::shrl(Register dst, int imm8) {
3542   assert(isShiftCount(imm8), "illegal shift count");
3543   int encode = prefix_and_encode(dst->encoding());
3544   emit_int8((unsigned char)0xC1);
3545   emit_int8((unsigned char)(0xE8 | encode));
3546   emit_int8(imm8);
3547 }
3548 
3549 void Assembler::shrl(Register dst) {
3550   int encode = prefix_and_encode(dst->encoding());
3551   emit_int8((unsigned char)0xD3);
3552   emit_int8((unsigned char)(0xE8 | encode));
3553 }
3554 
3555 // copies a single word from [esi] to [edi]
3556 void Assembler::smovl() {
3557   emit_int8((unsigned char)0xA5);
3558 }
3559 
3560 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3561   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3562   if (VM_Version::supports_evex()) {
3563     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3564   } else {
3565     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3566   }
3567 }
3568 
3569 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3570   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3571   if (VM_Version::supports_evex()) {
3572     _tuple_type = EVEX_T1S;
3573     _input_size_in_bits = EVEX_64bit;
3574     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3575   } else {
3576     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3577   }
3578 }
3579 
3580 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3581   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3582   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3583 }
3584 
3585 void Assembler::std() {
3586   emit_int8((unsigned char)0xFD);
3587 }
3588 
3589 void Assembler::sqrtss(XMMRegister dst, Address src) {
3590   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3591   if (VM_Version::supports_evex()) {
3592     _tuple_type = EVEX_T1S;
3593     _input_size_in_bits = EVEX_32bit;
3594   }
3595   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3596 }
3597 
3598 void Assembler::stmxcsr( Address dst) {
3599   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3600   InstructionMark im(this);
3601   prefix(dst);
3602   emit_int8(0x0F);
3603   emit_int8((unsigned char)0xAE);
3604   emit_operand(as_Register(3), dst);
3605 }
3606 
3607 void Assembler::subl(Address dst, int32_t imm32) {
3608   InstructionMark im(this);
3609   prefix(dst);
3610   emit_arith_operand(0x81, rbp, dst, imm32);
3611 }
3612 
3613 void Assembler::subl(Address dst, Register src) {
3614   InstructionMark im(this);
3615   prefix(dst, src);
3616   emit_int8(0x29);
3617   emit_operand(src, dst);
3618 }
3619 
3620 void Assembler::subl(Register dst, int32_t imm32) {
3621   prefix(dst);
3622   emit_arith(0x81, 0xE8, dst, imm32);
3623 }
3624 
3625 // Force generation of a 4 byte immediate value even if it fits into 8bit
3626 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3627   prefix(dst);
3628   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3629 }
3630 
3631 void Assembler::subl(Register dst, Address src) {
3632   InstructionMark im(this);
3633   prefix(src, dst);
3634   emit_int8(0x2B);
3635   emit_operand(dst, src);
3636 }
3637 
3638 void Assembler::subl(Register dst, Register src) {
3639   (void) prefix_and_encode(dst->encoding(), src->encoding());
3640   emit_arith(0x2B, 0xC0, dst, src);
3641 }
3642 
3643 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3645   if (VM_Version::supports_evex()) {
3646     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3647   } else {
3648     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3649   }
3650 }
3651 
3652 void Assembler::subsd(XMMRegister dst, Address src) {
3653   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3654   if (VM_Version::supports_evex()) {
3655     _tuple_type = EVEX_T1S;
3656     _input_size_in_bits = EVEX_64bit;
3657   }
3658   if (VM_Version::supports_evex()) {
3659     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3660   } else {
3661     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3662   }
3663 }
3664 
3665 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3666   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3667   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3668 }
3669 
3670 void Assembler::subss(XMMRegister dst, Address src) {
3671   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3672   if (VM_Version::supports_evex()) {
3673     _tuple_type = EVEX_T1S;
3674     _input_size_in_bits = EVEX_32bit;
3675   }
3676   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3677 }
3678 
3679 void Assembler::testb(Register dst, int imm8) {
3680   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3681   (void) prefix_and_encode(dst->encoding(), true);
3682   emit_arith_b(0xF6, 0xC0, dst, imm8);
3683 }
3684 
3685 void Assembler::testl(Register dst, int32_t imm32) {
3686   // not using emit_arith because test
3687   // doesn't support sign-extension of
3688   // 8bit operands
3689   int encode = dst->encoding();
3690   if (encode == 0) {
3691     emit_int8((unsigned char)0xA9);
3692   } else {
3693     encode = prefix_and_encode(encode);
3694     emit_int8((unsigned char)0xF7);
3695     emit_int8((unsigned char)(0xC0 | encode));
3696   }
3697   emit_int32(imm32);
3698 }
3699 
3700 void Assembler::testl(Register dst, Register src) {
3701   (void) prefix_and_encode(dst->encoding(), src->encoding());
3702   emit_arith(0x85, 0xC0, dst, src);
3703 }
3704 
3705 void Assembler::testl(Register dst, Address  src) {
3706   InstructionMark im(this);
3707   prefix(src, dst);
3708   emit_int8((unsigned char)0x85);
3709   emit_operand(dst, src);
3710 }
3711 
3712 void Assembler::tzcntl(Register dst, Register src) {
3713   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3714   emit_int8((unsigned char)0xF3);
3715   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3716   emit_int8(0x0F);
3717   emit_int8((unsigned char)0xBC);
3718   emit_int8((unsigned char)0xC0 | encode);
3719 }
3720 
3721 void Assembler::tzcntq(Register dst, Register src) {
3722   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3723   emit_int8((unsigned char)0xF3);
3724   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3725   emit_int8(0x0F);
3726   emit_int8((unsigned char)0xBC);
3727   emit_int8((unsigned char)(0xC0 | encode));
3728 }
3729 
3730 void Assembler::ucomisd(XMMRegister dst, Address src) {
3731   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3732   if (VM_Version::supports_evex()) {
3733     _tuple_type = EVEX_T1S;
3734     _input_size_in_bits = EVEX_64bit;
3735     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3736   } else {
3737     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3738   }
3739 }
3740 
3741 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3742   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3743   if (VM_Version::supports_evex()) {
3744     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3745   } else {
3746     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3747   }
3748 }
3749 
3750 void Assembler::ucomiss(XMMRegister dst, Address src) {
3751   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3752   if (VM_Version::supports_evex()) {
3753     _tuple_type = EVEX_T1S;
3754     _input_size_in_bits = EVEX_32bit;
3755   }
3756   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3757 }
3758 
3759 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3760   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3761   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3762 }
3763 
3764 void Assembler::xabort(int8_t imm8) {
3765   emit_int8((unsigned char)0xC6);
3766   emit_int8((unsigned char)0xF8);
3767   emit_int8((unsigned char)(imm8 & 0xFF));
3768 }
3769 
3770 void Assembler::xaddl(Address dst, Register src) {
3771   InstructionMark im(this);
3772   prefix(dst, src);
3773   emit_int8(0x0F);
3774   emit_int8((unsigned char)0xC1);
3775   emit_operand(src, dst);
3776 }
3777 
3778 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3779   InstructionMark im(this);
3780   relocate(rtype);
3781   if (abort.is_bound()) {
3782     address entry = target(abort);
3783     assert(entry != NULL, "abort entry NULL");
3784     intptr_t offset = entry - pc();
3785     emit_int8((unsigned char)0xC7);
3786     emit_int8((unsigned char)0xF8);
3787     emit_int32(offset - 6); // 2 opcode + 4 address
3788   } else {
3789     abort.add_patch_at(code(), locator());
3790     emit_int8((unsigned char)0xC7);
3791     emit_int8((unsigned char)0xF8);
3792     emit_int32(0);
3793   }
3794 }
3795 
3796 void Assembler::xchgl(Register dst, Address src) { // xchg
3797   InstructionMark im(this);
3798   prefix(src, dst);
3799   emit_int8((unsigned char)0x87);
3800   emit_operand(dst, src);
3801 }
3802 
3803 void Assembler::xchgl(Register dst, Register src) {
3804   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3805   emit_int8((unsigned char)0x87);
3806   emit_int8((unsigned char)(0xC0 | encode));
3807 }
3808 
3809 void Assembler::xend() {
3810   emit_int8((unsigned char)0x0F);
3811   emit_int8((unsigned char)0x01);
3812   emit_int8((unsigned char)0xD5);
3813 }
3814 
3815 void Assembler::xgetbv() {
3816   emit_int8(0x0F);
3817   emit_int8(0x01);
3818   emit_int8((unsigned char)0xD0);
3819 }
3820 
3821 void Assembler::xorl(Register dst, int32_t imm32) {
3822   prefix(dst);
3823   emit_arith(0x81, 0xF0, dst, imm32);
3824 }
3825 
3826 void Assembler::xorl(Register dst, Address src) {
3827   InstructionMark im(this);
3828   prefix(src, dst);
3829   emit_int8(0x33);
3830   emit_operand(dst, src);
3831 }
3832 
3833 void Assembler::xorl(Register dst, Register src) {
3834   (void) prefix_and_encode(dst->encoding(), src->encoding());
3835   emit_arith(0x33, 0xC0, dst, src);
3836 }
3837 
3838 
3839 // AVX 3-operands scalar float-point arithmetic instructions
3840 
3841 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3842   assert(VM_Version::supports_avx(), "");
3843   if (VM_Version::supports_evex()) {
3844     _tuple_type = EVEX_T1S;
3845     _input_size_in_bits = EVEX_64bit;
3846     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3847   } else {
3848     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3849   }
3850 }
3851 
3852 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3853   assert(VM_Version::supports_avx(), "");
3854   if (VM_Version::supports_evex()) {
3855     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3856   } else {
3857     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3858   }
3859 }
3860 
3861 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3862   assert(VM_Version::supports_avx(), "");
3863   if (VM_Version::supports_evex()) {
3864     _tuple_type = EVEX_T1S;
3865     _input_size_in_bits = EVEX_32bit;
3866   }
3867   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3868 }
3869 
3870 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3871   assert(VM_Version::supports_avx(), "");
3872   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3873 }
3874 
3875 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3876   assert(VM_Version::supports_avx(), "");
3877   if (VM_Version::supports_evex()) {
3878     _tuple_type = EVEX_T1S;
3879     _input_size_in_bits = EVEX_64bit;
3880     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3881   } else {
3882     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3883   }
3884 }
3885 
3886 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3887   assert(VM_Version::supports_avx(), "");
3888   if (VM_Version::supports_evex()) {
3889     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3890   } else {
3891     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3892   }
3893 }
3894 
3895 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3896   assert(VM_Version::supports_avx(), "");
3897   if (VM_Version::supports_evex()) {
3898     _tuple_type = EVEX_T1S;
3899     _input_size_in_bits = EVEX_32bit;
3900   }
3901   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3902 }
3903 
3904 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3905   assert(VM_Version::supports_avx(), "");
3906   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3907 }
3908 
3909 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3910   assert(VM_Version::supports_avx(), "");
3911   if (VM_Version::supports_evex()) {
3912     _tuple_type = EVEX_T1S;
3913     _input_size_in_bits = EVEX_64bit;
3914     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3915   } else {
3916     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3917   }
3918 }
3919 
3920 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3921   assert(VM_Version::supports_avx(), "");
3922   if (VM_Version::supports_evex()) {
3923     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3924   } else {
3925     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3926   }
3927 }
3928 
3929 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3930   assert(VM_Version::supports_avx(), "");
3931   if (VM_Version::supports_evex()) {
3932     _tuple_type = EVEX_T1S;
3933     _input_size_in_bits = EVEX_32bit;
3934   }
3935   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3936 }
3937 
3938 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3939   assert(VM_Version::supports_avx(), "");
3940   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3941 }
3942 
3943 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3944   assert(VM_Version::supports_avx(), "");
3945   if (VM_Version::supports_evex()) {
3946     _tuple_type = EVEX_T1S;
3947     _input_size_in_bits = EVEX_64bit;
3948     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3949   } else {
3950     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3951   }
3952 }
3953 
3954 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3955   assert(VM_Version::supports_avx(), "");
3956   if (VM_Version::supports_evex()) {
3957     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3958   } else {
3959     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3960   }
3961 }
3962 
3963 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3964   assert(VM_Version::supports_avx(), "");
3965   if (VM_Version::supports_evex()) {
3966     _tuple_type = EVEX_T1S;
3967     _input_size_in_bits = EVEX_32bit;
3968   }
3969   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3970 }
3971 
3972 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3973   assert(VM_Version::supports_avx(), "");
3974   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3975 }
3976 
3977 //====================VECTOR ARITHMETIC=====================================
3978 
3979 // Float-point vector arithmetic
3980 
3981 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3982   _instruction_uses_vl = true;
3983   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3984   if (VM_Version::supports_evex()) {
3985     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
3986   } else {
3987     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3988   }
3989 }
3990 
3991 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3992   _instruction_uses_vl = true;
3993   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3994   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3995 }
3996 
3997 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3998   _instruction_uses_vl = true;
3999   assert(VM_Version::supports_avx(), "");
4000   if (VM_Version::supports_evex()) {
4001     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4002   } else {
4003     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4004   }
4005 }
4006 
4007 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4008   _instruction_uses_vl = true;
4009   assert(VM_Version::supports_avx(), "");
4010   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4011 }
4012 
4013 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4014   _instruction_uses_vl = true;
4015   assert(VM_Version::supports_avx(), "");
4016   if (VM_Version::supports_evex()) {
4017     _tuple_type = EVEX_FV;
4018     _input_size_in_bits = EVEX_64bit;
4019     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4020   } else {
4021     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4022   }
4023 }
4024 
4025 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4026   _instruction_uses_vl = true;
4027   assert(VM_Version::supports_avx(), "");
4028   if (VM_Version::supports_evex()) {
4029     _tuple_type = EVEX_FV;
4030     _input_size_in_bits = EVEX_32bit;
4031   }
4032   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4033 }
4034 
4035 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
4036   _instruction_uses_vl = true;
4037   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4038   if (VM_Version::supports_evex()) {
4039     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
4040   } else {
4041     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
4042   }
4043 }
4044 
4045 void Assembler::subps(XMMRegister dst, XMMRegister src) {
4046   _instruction_uses_vl = true;
4047   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4048   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
4049 }
4050 
4051 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4052   _instruction_uses_vl = true;
4053   assert(VM_Version::supports_avx(), "");
4054   if (VM_Version::supports_evex()) {
4055     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4056   } else {
4057     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4058   }
4059 }
4060 
4061 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4062   _instruction_uses_vl = true;
4063   assert(VM_Version::supports_avx(), "");
4064   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4065 }
4066 
4067 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4068   _instruction_uses_vl = true;
4069   assert(VM_Version::supports_avx(), "");
4070   if (VM_Version::supports_evex()) {
4071     _tuple_type = EVEX_FV;
4072     _input_size_in_bits = EVEX_64bit;
4073     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4074   } else {
4075     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4076   }
4077 }
4078 
4079 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4080   _instruction_uses_vl = true;
4081   assert(VM_Version::supports_avx(), "");
4082   if (VM_Version::supports_evex()) {
4083     _tuple_type = EVEX_FV;
4084     _input_size_in_bits = EVEX_32bit;
4085   }
4086   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4087 }
4088 
4089 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
4090   _instruction_uses_vl = true;
4091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4092   if (VM_Version::supports_evex()) {
4093     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4094   } else {
4095     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4096   }
4097 }
4098 
4099 void Assembler::mulpd(XMMRegister dst, Address src) {
4100   _instruction_uses_vl = true;
4101   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4102   if (VM_Version::supports_evex()) {
4103     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4104   } else {
4105     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4106   }
4107 }
4108 
4109 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
4110   _instruction_uses_vl = true;
4111   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4112   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
4113 }
4114 
4115 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4116   _instruction_uses_vl = true;
4117   assert(VM_Version::supports_avx(), "");
4118   if (VM_Version::supports_evex()) {
4119     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4120   } else {
4121     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4122   }
4123 }
4124 
4125 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4126   _instruction_uses_vl = true;
4127   assert(VM_Version::supports_avx(), "");
4128   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4129 }
4130 
4131 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4132   _instruction_uses_vl = true;
4133   assert(VM_Version::supports_avx(), "");
4134   if (VM_Version::supports_evex()) {
4135     _tuple_type = EVEX_FV;
4136     _input_size_in_bits = EVEX_64bit;
4137     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4138   } else {
4139     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4140   }
4141 }
4142 
4143 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4144   _instruction_uses_vl = true;
4145   assert(VM_Version::supports_avx(), "");
4146   if (VM_Version::supports_evex()) {
4147     _tuple_type = EVEX_FV;
4148     _input_size_in_bits = EVEX_32bit;
4149   }
4150   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4151 }
4152 
4153 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
4154   _instruction_uses_vl = true;
4155   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4156   if (VM_Version::supports_evex()) {
4157     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
4158   } else {
4159     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
4160   }
4161 }
4162 
4163 void Assembler::divps(XMMRegister dst, XMMRegister src) {
4164   _instruction_uses_vl = true;
4165   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4166   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
4167 }
4168 
4169 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4170   _instruction_uses_vl = true;
4171   assert(VM_Version::supports_avx(), "");
4172   if (VM_Version::supports_evex()) {
4173     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4174   } else {
4175     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4176   }
4177 }
4178 
4179 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4180   _instruction_uses_vl = true;
4181   assert(VM_Version::supports_avx(), "");
4182   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4183 }
4184 
4185 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4186   _instruction_uses_vl = true;
4187   assert(VM_Version::supports_avx(), "");
4188   if (VM_Version::supports_evex()) {
4189     _tuple_type = EVEX_FV;
4190     _input_size_in_bits = EVEX_64bit;
4191     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4192   } else {
4193     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4194   }
4195 }
4196 
4197 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4198   _instruction_uses_vl = true;
4199   assert(VM_Version::supports_avx(), "");
4200   if (VM_Version::supports_evex()) {
4201     _tuple_type = EVEX_FV;
4202     _input_size_in_bits = EVEX_32bit;
4203   }
4204   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4205 }
4206 
4207 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
4208   _instruction_uses_vl = true;
4209   assert(VM_Version::supports_avx(), "");
4210   if (VM_Version::supports_evex()) {
4211     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4212   } else {
4213     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4214   }
4215 }
4216 
4217 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
4218   _instruction_uses_vl = true;
4219   assert(VM_Version::supports_avx(), "");
4220   if (VM_Version::supports_evex()) {
4221     _tuple_type = EVEX_FV;
4222     _input_size_in_bits = EVEX_64bit;
4223     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4224   } else {
4225     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4226   }
4227 }
4228 
4229 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
4230   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4231   if (VM_Version::supports_avx512dq()) {
4232     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4233   } else {
4234     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4235   }
4236 }
4237 
4238 void Assembler::andps(XMMRegister dst, XMMRegister src) {
4239   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4240   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4241 }
4242 
4243 void Assembler::andps(XMMRegister dst, Address src) {
4244   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4245   if (VM_Version::supports_evex()) {
4246     _tuple_type = EVEX_FV;
4247     _input_size_in_bits = EVEX_32bit;
4248   }
4249   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4250 }
4251 
4252 void Assembler::andpd(XMMRegister dst, Address src) {
4253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4254   if (VM_Version::supports_avx512dq()) {
4255     _tuple_type = EVEX_FV;
4256     _input_size_in_bits = EVEX_64bit;
4257     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4258   } else {
4259     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4260   }
4261 }
4262 
4263 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4264   assert(VM_Version::supports_avx(), "");
4265   if (VM_Version::supports_avx512dq()) {
4266     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4267   } else {
4268     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4269   }
4270 }
4271 
4272 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4273   assert(VM_Version::supports_avx(), "");
4274   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
4275 }
4276 
4277 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4278   assert(VM_Version::supports_avx(), "");
4279   if (VM_Version::supports_avx512dq()) {
4280     _tuple_type = EVEX_FV;
4281     _input_size_in_bits = EVEX_64bit;
4282     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4283   } else {
4284     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4285   }
4286 }
4287 
4288 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4289   assert(VM_Version::supports_avx(), "");
4290   if (VM_Version::supports_evex()) {
4291     _tuple_type = EVEX_FV;
4292     _input_size_in_bits = EVEX_32bit;
4293   }
4294   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4295 }
4296 
4297 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
4298   _instruction_uses_vl = true;
4299   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4300   if (VM_Version::supports_evex()) {
4301     emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
4302   } else {
4303     emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
4304   }
4305 }
4306 
4307 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
4308   _instruction_uses_vl = true;
4309   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4310   if (VM_Version::supports_evex()) {
4311     emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
4312   } else {
4313     emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
4314   }
4315 }
4316 
4317 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
4318   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4319   if (VM_Version::supports_avx512dq()) {
4320     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4321   } else {
4322     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4323   }
4324 }
4325 
4326 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
4327   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4328   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4329 }
4330 
4331 void Assembler::xorpd(XMMRegister dst, Address src) {
4332   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4333   if (VM_Version::supports_avx512dq()) {
4334     _tuple_type = EVEX_FV;
4335     _input_size_in_bits = EVEX_64bit;
4336     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4337   } else {
4338     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4339   }
4340 }
4341 
4342 void Assembler::xorps(XMMRegister dst, Address src) {
4343   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4344   if (VM_Version::supports_evex()) {
4345     _tuple_type = EVEX_FV;
4346     _input_size_in_bits = EVEX_32bit;
4347   }
4348   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4349 }
4350 
4351 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4352   assert(VM_Version::supports_avx(), "");
4353   if (VM_Version::supports_avx512dq()) {
4354     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4355   } else {
4356     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4357   }
4358 }
4359 
4360 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4361   assert(VM_Version::supports_avx(), "");
4362   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4363 }
4364 
4365 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4366   assert(VM_Version::supports_avx(), "");
4367   if (VM_Version::supports_avx512dq()) {
4368     _tuple_type = EVEX_FV;
4369     _input_size_in_bits = EVEX_64bit;
4370     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4371   } else {
4372     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4373   }
4374 }
4375 
4376 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4377   assert(VM_Version::supports_avx(), "");
4378   if (VM_Version::supports_evex()) {
4379     _tuple_type = EVEX_FV;
4380     _input_size_in_bits = EVEX_32bit;
4381   }
4382   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4383 }
4384 
4385 // Integer vector arithmetic
4386 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4387   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4388          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4389   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4390   emit_int8(0x01);
4391   emit_int8((unsigned char)(0xC0 | encode));
4392 }
4393 
4394 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4395   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4396          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4397   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4398   emit_int8(0x02);
4399   emit_int8((unsigned char)(0xC0 | encode));
4400 }
4401 
4402 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
4403   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4404   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4405 }
4406 
4407 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
4408   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4409   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4410 }
4411 
4412 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4413   _instruction_uses_vl = true;
4414   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4415   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4416 }
4417 
4418 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4419   _instruction_uses_vl = true;
4420   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4421   if (VM_Version::supports_evex()) {
4422     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4423   } else {
4424     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4425   }
4426 }
4427 
4428 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4429   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4430   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4431                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4432   emit_int8(0x01);
4433   emit_int8((unsigned char)(0xC0 | encode));
4434 }
4435 
4436 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4437   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4438   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4439                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4440   emit_int8(0x02);
4441   emit_int8((unsigned char)(0xC0 | encode));
4442 }
4443 
4444 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4445   assert(UseAVX > 0, "requires some form of AVX");
4446   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4447 }
4448 
4449 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4450   assert(UseAVX > 0, "requires some form of AVX");
4451   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4452 }
4453 
4454 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4455   _instruction_uses_vl = true;
4456   assert(UseAVX > 0, "requires some form of AVX");
4457   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4458 }
4459 
4460 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4461   _instruction_uses_vl = true;
4462   assert(UseAVX > 0, "requires some form of AVX");
4463   if (VM_Version::supports_evex()) {
4464     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4465   } else {
4466     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4467   }
4468 }
4469 
4470 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4471   assert(UseAVX > 0, "requires some form of AVX");
4472   if (VM_Version::supports_evex()) {
4473     _tuple_type = EVEX_FVM;
4474   }
4475   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4476 }
4477 
4478 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4479   assert(UseAVX > 0, "requires some form of AVX");
4480   if (VM_Version::supports_evex()) {
4481     _tuple_type = EVEX_FVM;
4482   }
4483   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4484 }
4485 
4486 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4487   _instruction_uses_vl = true;
4488   assert(UseAVX > 0, "requires some form of AVX");
4489   if (VM_Version::supports_evex()) {
4490     _tuple_type = EVEX_FV;
4491     _input_size_in_bits = EVEX_32bit;
4492   }
4493   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4494 }
4495 
4496 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4497   _instruction_uses_vl = true;
4498   assert(UseAVX > 0, "requires some form of AVX");
4499   if (VM_Version::supports_evex()) {
4500     _tuple_type = EVEX_FV;
4501     _input_size_in_bits = EVEX_64bit;
4502     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4503   } else {
4504     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4505   }
4506 }
4507 
4508 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4509   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4510   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4511 }
4512 
4513 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4514   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4515   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4516 }
4517 
4518 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4519   _instruction_uses_vl = true;
4520   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4521   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4522 }
4523 
4524 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4525   _instruction_uses_vl = true;
4526   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4527   if (VM_Version::supports_evex()) {
4528     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4529   } else {
4530     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4531   }
4532 }
4533 
4534 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4535   assert(UseAVX > 0, "requires some form of AVX");
4536   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4537 }
4538 
4539 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4540   assert(UseAVX > 0, "requires some form of AVX");
4541   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4542 }
4543 
4544 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4545   _instruction_uses_vl = true;
4546   assert(UseAVX > 0, "requires some form of AVX");
4547   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4548 }
4549 
4550 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4551   _instruction_uses_vl = true;
4552   assert(UseAVX > 0, "requires some form of AVX");
4553   if (VM_Version::supports_evex()) {
4554     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4555   } else {
4556     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4557   }
4558 }
4559 
4560 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4561   assert(UseAVX > 0, "requires some form of AVX");
4562   if (VM_Version::supports_evex()) {
4563     _tuple_type = EVEX_FVM;
4564   }
4565   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4566 }
4567 
4568 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4569   assert(UseAVX > 0, "requires some form of AVX");
4570   if (VM_Version::supports_evex()) {
4571     _tuple_type = EVEX_FVM;
4572   }
4573   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4574 }
4575 
4576 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4577   _instruction_uses_vl = true;
4578   assert(UseAVX > 0, "requires some form of AVX");
4579   if (VM_Version::supports_evex()) {
4580     _tuple_type = EVEX_FV;
4581     _input_size_in_bits = EVEX_32bit;
4582   }
4583   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4584 }
4585 
4586 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4587   _instruction_uses_vl = true;
4588   assert(UseAVX > 0, "requires some form of AVX");
4589   if (VM_Version::supports_evex()) {
4590     _tuple_type = EVEX_FV;
4591     _input_size_in_bits = EVEX_64bit;
4592     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4593   } else {
4594     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4595   }
4596 }
4597 
4598 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4599   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4600   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4601 }
4602 
4603 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4604   _instruction_uses_vl = true;
4605   assert(VM_Version::supports_sse4_1(), "");
4606   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
4607                                       /* no_mask_reg */ false, VEX_OPCODE_0F_38);
4608   emit_int8(0x40);
4609   emit_int8((unsigned char)(0xC0 | encode));
4610 }
4611 
4612 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4613   assert(UseAVX > 0, "requires some form of AVX");
4614   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4615 }
4616 
4617 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4618   _instruction_uses_vl = true;
4619   assert(UseAVX > 0, "requires some form of AVX");
4620   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
4621   emit_int8(0x40);
4622   emit_int8((unsigned char)(0xC0 | encode));
4623 }
4624 
4625 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4626   assert(UseAVX > 2, "requires some form of AVX");
4627   int src_enc = src->encoding();
4628   int dst_enc = dst->encoding();
4629   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4630   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
4631                                      /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
4632   emit_int8(0x40);
4633   emit_int8((unsigned char)(0xC0 | encode));
4634 }
4635 
4636 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4637   assert(UseAVX > 0, "requires some form of AVX");
4638   if (VM_Version::supports_evex()) {
4639     _tuple_type = EVEX_FVM;
4640   }
4641   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4642 }
4643 
4644 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4645   _instruction_uses_vl = true;
4646   assert(UseAVX > 0, "requires some form of AVX");
4647   if (VM_Version::supports_evex()) {
4648     _tuple_type = EVEX_FV;
4649     _input_size_in_bits = EVEX_32bit;
4650   }
4651   InstructionMark im(this);
4652   int dst_enc = dst->encoding();
4653   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4654   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4655              VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
4656   emit_int8(0x40);
4657   emit_operand(dst, src);
4658 }
4659 
4660 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4661   assert(UseAVX > 0, "requires some form of AVX");
4662   if (VM_Version::supports_evex()) {
4663     _tuple_type = EVEX_FV;
4664     _input_size_in_bits = EVEX_64bit;
4665   }
4666   InstructionMark im(this);
4667   int dst_enc = dst->encoding();
4668   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4669   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4670              VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
4671   emit_int8(0x40);
4672   emit_operand(dst, src);
4673 }
4674 
4675 // Shift packed integers left by specified number of bits.
4676 void Assembler::psllw(XMMRegister dst, int shift) {
4677   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4678   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4679   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
4680                                       /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4681   emit_int8(0x71);
4682   emit_int8((unsigned char)(0xC0 | encode));
4683   emit_int8(shift & 0xFF);
4684 }
4685 
4686 void Assembler::pslld(XMMRegister dst, int shift) {
4687   _instruction_uses_vl = true;
4688   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4689   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4690   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4691   emit_int8(0x72);
4692   emit_int8((unsigned char)(0xC0 | encode));
4693   emit_int8(shift & 0xFF);
4694 }
4695 
4696 void Assembler::psllq(XMMRegister dst, int shift) {
4697   _instruction_uses_vl = true;
4698   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4699   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4700   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
4701   emit_int8(0x73);
4702   emit_int8((unsigned char)(0xC0 | encode));
4703   emit_int8(shift & 0xFF);
4704 }
4705 
4706 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4707   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4708   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4709 }
4710 
4711 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4712   _instruction_uses_vl = true;
4713   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4714   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4715 }
4716 
4717 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4718   _instruction_uses_vl = true;
4719   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4720   if (VM_Version::supports_evex()) {
4721     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4722   } else {
4723     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4724   }
4725 }
4726 
4727 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4728   assert(UseAVX > 0, "requires some form of AVX");
4729   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4730   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4731   emit_int8(shift & 0xFF);
4732 }
4733 
4734 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4735   _instruction_uses_vl = true;
4736   assert(UseAVX > 0, "requires some form of AVX");
4737   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4738   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4739   emit_int8(shift & 0xFF);
4740 }
4741 
4742 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4743   _instruction_uses_vl = true;
4744   assert(UseAVX > 0, "requires some form of AVX");
4745   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4746   if (VM_Version::supports_evex()) {
4747     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4748   } else {
4749     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4750   }
4751   emit_int8(shift & 0xFF);
4752 }
4753 
4754 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4755   assert(UseAVX > 0, "requires some form of AVX");
4756   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4757 }
4758 
4759 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4760   _instruction_uses_vl = true;
4761   assert(UseAVX > 0, "requires some form of AVX");
4762   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4763 }
4764 
4765 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4766   _instruction_uses_vl = true;
4767   assert(UseAVX > 0, "requires some form of AVX");
4768   if (VM_Version::supports_evex()) {
4769     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4770   } else {
4771     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4772   }
4773 }
4774 
4775 // Shift packed integers logically right by specified number of bits.
4776 void Assembler::psrlw(XMMRegister dst, int shift) {
4777   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4778   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4779   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4780                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4781   emit_int8(0x71);
4782   emit_int8((unsigned char)(0xC0 | encode));
4783   emit_int8(shift & 0xFF);
4784 }
4785 
4786 void Assembler::psrld(XMMRegister dst, int shift) {
4787   _instruction_uses_vl = true;
4788   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4789   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4790   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4791   emit_int8(0x72);
4792   emit_int8((unsigned char)(0xC0 | encode));
4793   emit_int8(shift & 0xFF);
4794 }
4795 
4796 void Assembler::psrlq(XMMRegister dst, int shift) {
4797   _instruction_uses_vl = true;
4798   // Do not confuse it with psrldq SSE2 instruction which
4799   // shifts 128 bit value in xmm register by number of bytes.
4800   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4801   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4802   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4803                                       VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
4804   emit_int8(0x73);
4805   emit_int8((unsigned char)(0xC0 | encode));
4806   emit_int8(shift & 0xFF);
4807 }
4808 
4809 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4810   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4811   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4812 }
4813 
4814 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4815   _instruction_uses_vl = true;
4816   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4817   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4818 }
4819 
4820 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4821   _instruction_uses_vl = true;
4822   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4823   if (VM_Version::supports_evex()) {
4824     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4825   } else {
4826     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4827   }
4828 }
4829 
4830 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4831   assert(UseAVX > 0, "requires some form of AVX");
4832   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4833   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4834   emit_int8(shift & 0xFF);
4835 }
4836 
4837 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4838   _instruction_uses_vl = true;
4839   assert(UseAVX > 0, "requires some form of AVX");
4840   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4841   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4842   emit_int8(shift & 0xFF);
4843 }
4844 
4845 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4846   _instruction_uses_vl = true;
4847   assert(UseAVX > 0, "requires some form of AVX");
4848   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4849   if (VM_Version::supports_evex()) {
4850     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4851   } else {
4852     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4853   }
4854   emit_int8(shift & 0xFF);
4855 }
4856 
4857 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4858   assert(UseAVX > 0, "requires some form of AVX");
4859   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4860 }
4861 
4862 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4863   _instruction_uses_vl = true;
4864   assert(UseAVX > 0, "requires some form of AVX");
4865   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4866 }
4867 
4868 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4869   _instruction_uses_vl = true;
4870   assert(UseAVX > 0, "requires some form of AVX");
4871   if (VM_Version::supports_evex()) {
4872     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4873   } else {
4874     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4875   }
4876 }
4877 
4878 // Shift packed integers arithmetically right by specified number of bits.
4879 void Assembler::psraw(XMMRegister dst, int shift) {
4880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4881   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4882   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4883                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4884   emit_int8(0x71);
4885   emit_int8((unsigned char)(0xC0 | encode));
4886   emit_int8(shift & 0xFF);
4887 }
4888 
4889 void Assembler::psrad(XMMRegister dst, int shift) {
4890   _instruction_uses_vl = true;
4891   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4892   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4893   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4894   emit_int8(0x72);
4895   emit_int8((unsigned char)(0xC0 | encode));
4896   emit_int8(shift & 0xFF);
4897 }
4898 
4899 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4900   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4901   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4902 }
4903 
4904 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4905   _instruction_uses_vl = true;
4906   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4907   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4908 }
4909 
4910 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4911   assert(UseAVX > 0, "requires some form of AVX");
4912   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4913   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4914   emit_int8(shift & 0xFF);
4915 }
4916 
4917 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4918   _instruction_uses_vl = true;
4919   assert(UseAVX > 0, "requires some form of AVX");
4920   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4921   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4922   emit_int8(shift & 0xFF);
4923 }
4924 
4925 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4926   assert(UseAVX > 0, "requires some form of AVX");
4927   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4928 }
4929 
4930 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4931   _instruction_uses_vl = true;
4932   assert(UseAVX > 0, "requires some form of AVX");
4933   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4934 }
4935 
4936 
4937 // logical operations packed integers
4938 void Assembler::pand(XMMRegister dst, XMMRegister src) {
4939   _instruction_uses_vl = true;
4940   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4941   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
4942 }
4943 
4944 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4945   _instruction_uses_vl = true;
4946   assert(UseAVX > 0, "requires some form of AVX");
4947   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4948 }
4949 
4950 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4951   _instruction_uses_vl = true;
4952   assert(UseAVX > 0, "requires some form of AVX");
4953   if (VM_Version::supports_evex()) {
4954     _tuple_type = EVEX_FV;
4955     _input_size_in_bits = EVEX_32bit;
4956   }
4957   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4958 }
4959 
4960 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
4961   _instruction_uses_vl = true;
4962   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4963   if (VM_Version::supports_evex()) {
4964     emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
4965   }
4966   else {
4967     emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
4968   }
4969 }
4970 
4971 void Assembler::por(XMMRegister dst, XMMRegister src) {
4972   _instruction_uses_vl = true;
4973   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4974   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
4975 }
4976 
4977 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4978   _instruction_uses_vl = true;
4979   assert(UseAVX > 0, "requires some form of AVX");
4980   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4981 }
4982 
4983 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4984   _instruction_uses_vl = true;
4985   assert(UseAVX > 0, "requires some form of AVX");
4986   if (VM_Version::supports_evex()) {
4987     _tuple_type = EVEX_FV;
4988     _input_size_in_bits = EVEX_32bit;
4989   }
4990   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4991 }
4992 
4993 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
4994   _instruction_uses_vl = true;
4995   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4996   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
4997 }
4998 
4999 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5000   _instruction_uses_vl = true;
5001   assert(UseAVX > 0, "requires some form of AVX");
5002   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5003 }
5004 
5005 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5006   _instruction_uses_vl = true;
5007   assert(UseAVX > 0, "requires some form of AVX");
5008   if (VM_Version::supports_evex()) {
5009     _tuple_type = EVEX_FV;
5010     _input_size_in_bits = EVEX_32bit;
5011   }
5012   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5013 }
5014 
5015 
5016 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5017   assert(VM_Version::supports_avx(), "");
5018   int vector_len = AVX_256bit;
5019   if (VM_Version::supports_evex()) {
5020     vector_len = AVX_512bit;
5021   }
5022   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5023   emit_int8(0x18);
5024   emit_int8((unsigned char)(0xC0 | encode));
5025   // 0x00 - insert into lower 128 bits
5026   // 0x01 - insert into upper 128 bits
5027   emit_int8(0x01);
5028 }
5029 
5030 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5031   assert(VM_Version::supports_evex(), "");
5032   int vector_len = AVX_512bit;
5033   int src_enc = src->encoding();
5034   int dst_enc = dst->encoding();
5035   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5036   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5037                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5038   emit_int8(0x1A);
5039   emit_int8((unsigned char)(0xC0 | encode));
5040   // 0x00 - insert into lower 256 bits
5041   // 0x01 - insert into upper 256 bits
5042   emit_int8(0x01);
5043 }
5044 
5045 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
5046   assert(VM_Version::supports_evex(), "");
5047   _tuple_type = EVEX_T4;
5048   _input_size_in_bits = EVEX_64bit;
5049   InstructionMark im(this);
5050   int vector_len = AVX_512bit;
5051   assert(dst != xnoreg, "sanity");
5052   int dst_enc = dst->encoding();
5053   // swap src<->dst for encoding
5054   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
5055   emit_int8(0x1A);
5056   emit_operand(dst, src);
5057   // 0x01 - insert into upper 128 bits
5058   emit_int8(0x01);
5059 }
5060 
5061 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
5062   assert(VM_Version::supports_evex(), "");
5063   int vector_len = AVX_512bit;
5064   int src_enc = src->encoding();
5065   int dst_enc = dst->encoding();
5066   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5067   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5068                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5069   emit_int8(0x18);
5070   emit_int8((unsigned char)(0xC0 | encode));
5071   // 0x00 - insert into q0 128 bits (0..127)
5072   // 0x01 - insert into q1 128 bits (128..255)
5073   // 0x02 - insert into q2 128 bits (256..383)
5074   // 0x03 - insert into q3 128 bits (384..511)
5075   emit_int8(value & 0x3);
5076 }
5077 
5078 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
5079   assert(VM_Version::supports_evex(), "");
5080   _tuple_type = EVEX_T4;
5081   _input_size_in_bits = EVEX_32bit;
5082   InstructionMark im(this);
5083   int vector_len = AVX_512bit;
5084   assert(dst != xnoreg, "sanity");
5085   int dst_enc = dst->encoding();
5086   // swap src<->dst for encoding
5087   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5088   emit_int8(0x18);
5089   emit_operand(dst, src);
5090   // 0x00 - insert into q0 128 bits (0..127)
5091   // 0x01 - insert into q1 128 bits (128..255)
5092   // 0x02 - insert into q2 128 bits (256..383)
5093   // 0x03 - insert into q3 128 bits (384..511)
5094   emit_int8(value & 0x3);
5095 }
5096 
5097 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
5098   assert(VM_Version::supports_avx(), "");
5099   int vector_len = AVX_256bit;
5100   if (VM_Version::supports_evex()) {
5101     _tuple_type = EVEX_T4;
5102     _input_size_in_bits = EVEX_32bit;
5103     vector_len = AVX_512bit;
5104   }
5105   InstructionMark im(this);
5106   assert(dst != xnoreg, "sanity");
5107   int dst_enc = dst->encoding();
5108   // swap src<->dst for encoding
5109   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5110   emit_int8(0x18);
5111   emit_operand(dst, src);
5112   // 0x01 - insert into upper 128 bits
5113   emit_int8(0x01);
5114 }
5115 
5116 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
5117   assert(VM_Version::supports_avx(), "");
5118   int vector_len = AVX_256bit;
5119   if (VM_Version::supports_evex()) {
5120     vector_len = AVX_512bit;
5121   }
5122   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5123   emit_int8(0x19);
5124   emit_int8((unsigned char)(0xC0 | encode));
5125   // 0x00 - insert into lower 128 bits
5126   // 0x01 - insert into upper 128 bits
5127   emit_int8(0x01);
5128 }
5129 
5130 void Assembler::vextractf128h(Address dst, XMMRegister src) {
5131   assert(VM_Version::supports_avx(), "");
5132   int vector_len = AVX_256bit;
5133   if (VM_Version::supports_evex()) {
5134     _tuple_type = EVEX_T4;
5135     _input_size_in_bits = EVEX_32bit;
5136     vector_len = AVX_512bit;
5137   }
5138   InstructionMark im(this);
5139   assert(src != xnoreg, "sanity");
5140   int src_enc = src->encoding();
5141   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5142   emit_int8(0x19);
5143   emit_operand(src, dst);
5144   // 0x01 - extract from upper 128 bits
5145   emit_int8(0x01);
5146 }
5147 
5148 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5149   assert(VM_Version::supports_avx2(), "");
5150   int vector_len = AVX_256bit;
5151   if (VM_Version::supports_evex()) {
5152     vector_len = AVX_512bit;
5153   }
5154   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5155   emit_int8(0x38);
5156   emit_int8((unsigned char)(0xC0 | encode));
5157   // 0x00 - insert into lower 128 bits
5158   // 0x01 - insert into upper 128 bits
5159   emit_int8(0x01);
5160 }
5161 
5162 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5163   assert(VM_Version::supports_evex(), "");
5164   int vector_len = AVX_512bit;
5165   int src_enc = src->encoding();
5166   int dst_enc = dst->encoding();
5167   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5168   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5169                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
5170   emit_int8(0x38);
5171   emit_int8((unsigned char)(0xC0 | encode));
5172   // 0x00 - insert into lower 256 bits
5173   // 0x01 - insert into upper 256 bits
5174   emit_int8(0x01);
5175 }
5176 
5177 void Assembler::vinserti128h(XMMRegister dst, Address src) {
5178   assert(VM_Version::supports_avx2(), "");
5179   int vector_len = AVX_256bit;
5180   if (VM_Version::supports_evex()) {
5181     _tuple_type = EVEX_T4;
5182     _input_size_in_bits = EVEX_32bit;
5183     vector_len = AVX_512bit;
5184   }
5185   InstructionMark im(this);
5186   assert(dst != xnoreg, "sanity");
5187   int dst_enc = dst->encoding();
5188   // swap src<->dst for encoding
5189   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5190   emit_int8(0x38);
5191   emit_operand(dst, src);
5192   // 0x01 - insert into upper 128 bits
5193   emit_int8(0x01);
5194 }
5195 
5196 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
5197   assert(VM_Version::supports_avx(), "");
5198   int vector_len = AVX_256bit;
5199   if (VM_Version::supports_evex()) {
5200     vector_len = AVX_512bit;
5201   }
5202   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5203   emit_int8(0x39);
5204   emit_int8((unsigned char)(0xC0 | encode));
5205   // 0x00 - insert into lower 128 bits
5206   // 0x01 - insert into upper 128 bits
5207   emit_int8(0x01);
5208 }
5209 
5210 void Assembler::vextracti128h(Address dst, XMMRegister src) {
5211   assert(VM_Version::supports_avx2(), "");
5212   int vector_len = AVX_256bit;
5213   if (VM_Version::supports_evex()) {
5214     _tuple_type = EVEX_T4;
5215     _input_size_in_bits = EVEX_32bit;
5216     vector_len = AVX_512bit;
5217   }
5218   InstructionMark im(this);
5219   assert(src != xnoreg, "sanity");
5220   int src_enc = src->encoding();
5221   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5222   emit_int8(0x39);
5223   emit_operand(src, dst);
5224   // 0x01 - extract from upper 128 bits
5225   emit_int8(0x01);
5226 }
5227 
5228 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
5229   assert(VM_Version::supports_evex(), "");
5230   int vector_len = AVX_512bit;
5231   int src_enc = src->encoding();
5232   int dst_enc = dst->encoding();
5233   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5234                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5235   emit_int8(0x3B);
5236   emit_int8((unsigned char)(0xC0 | encode));
5237   // 0x01 - extract from upper 256 bits
5238   emit_int8(0x01);
5239 }
5240 
5241 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
5242   assert(VM_Version::supports_evex(), "");
5243   int vector_len = AVX_512bit;
5244   int src_enc = src->encoding();
5245   int dst_enc = dst->encoding();
5246   int encode;
5247   if (VM_Version::supports_avx512dq()) {
5248     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5249                                    /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5250   } else {
5251     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5252                                    /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
5253   }
5254   emit_int8(0x39);
5255   emit_int8((unsigned char)(0xC0 | encode));
5256   // 0x01 - extract from bits 255:128
5257   // 0x02 - extract from bits 383:256
5258   // 0x03 - extract from bits 511:384
5259   emit_int8(value & 0x3);
5260 }
5261 
5262 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
5263   assert(VM_Version::supports_evex(), "");
5264   int vector_len = AVX_512bit;
5265   int src_enc = src->encoding();
5266   int dst_enc = dst->encoding();
5267   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5268                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5269   emit_int8(0x1B);
5270   emit_int8((unsigned char)(0xC0 | encode));
5271   // 0x01 - extract from upper 256 bits
5272   emit_int8(0x01);
5273 }
5274 
5275 void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
5276   assert(VM_Version::supports_evex(), "");
5277   _tuple_type = EVEX_T4;
5278   _input_size_in_bits = EVEX_64bit;
5279   InstructionMark im(this);
5280   int vector_len = AVX_512bit;
5281   assert(src != xnoreg, "sanity");
5282   int src_enc = src->encoding();
5283   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5284              /* vex_w */ true, vector_len);
5285   emit_int8(0x1B);
5286   emit_operand(src, dst);
5287   // 0x01 - extract from upper 256 bits
5288   emit_int8(0x01);
5289 }
5290 
5291 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
5292   assert(VM_Version::supports_evex(), "");
5293   int vector_len = AVX_512bit;
5294   int src_enc = src->encoding();
5295   int dst_enc = dst->encoding();
5296   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5297                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5298   emit_int8(0x19);
5299   emit_int8((unsigned char)(0xC0 | encode));
5300   // 0x00 - extract from bits 127:0
5301   // 0x01 - extract from bits 255:128
5302   // 0x02 - extract from bits 383:256
5303   // 0x03 - extract from bits 511:384
5304   emit_int8(value & 0x3);
5305 }
5306 
5307 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
5308   assert(VM_Version::supports_evex(), "");
5309   _tuple_type = EVEX_T4;
5310   _input_size_in_bits = EVEX_32bit;
5311   InstructionMark im(this);
5312   int vector_len = AVX_512bit;
5313   assert(src != xnoreg, "sanity");
5314   int src_enc = src->encoding();
5315   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5316   emit_int8(0x19);
5317   emit_operand(src, dst);
5318   // 0x00 - extract from bits 127:0
5319   // 0x01 - extract from bits 255:128
5320   // 0x02 - extract from bits 383:256
5321   // 0x03 - extract from bits 511:384
5322   emit_int8(value & 0x3);
5323 }
5324 
5325 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
5326   assert(VM_Version::supports_evex(), "");
5327   int vector_len = AVX_512bit;
5328   int src_enc = src->encoding();
5329   int dst_enc = dst->encoding();
5330   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5331                                      /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5332   emit_int8(0x19);
5333   emit_int8((unsigned char)(0xC0 | encode));
5334   // 0x01 - extract from bits 255:128
5335   // 0x02 - extract from bits 383:256
5336   // 0x03 - extract from bits 511:384
5337   emit_int8(value & 0x3);
5338 }
5339 
5340 // duplicate 4-bytes integer data from src into 8 locations in dest
5341 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
5342   _instruction_uses_vl = true;
5343   assert(UseAVX > 1, "");
5344   int vector_len = AVX_256bit;
5345   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5346   emit_int8(0x58);
5347   emit_int8((unsigned char)(0xC0 | encode));
5348 }
5349 
5350 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5351 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
5352   _instruction_uses_vl = true;
5353   assert(UseAVX > 1, "");
5354   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5355   emit_int8(0x78);
5356   emit_int8((unsigned char)(0xC0 | encode));
5357 }
5358 
5359 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
5360   _instruction_uses_vl = true;
5361   assert(UseAVX > 1, "");
5362   _tuple_type = EVEX_T1S;
5363   _input_size_in_bits = EVEX_8bit;
5364   InstructionMark im(this);
5365   assert(dst != xnoreg, "sanity");
5366   int dst_enc = dst->encoding();
5367   // swap src<->dst for encoding
5368   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5369   emit_int8(0x78);
5370   emit_operand(dst, src);
5371 }
5372 
5373 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5374 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
5375   _instruction_uses_vl = true;
5376   assert(UseAVX > 1, "");
5377   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5378   emit_int8(0x79);
5379   emit_int8((unsigned char)(0xC0 | encode));
5380 }
5381 
5382 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
5383   _instruction_uses_vl = true;
5384   assert(UseAVX > 1, "");
5385   _tuple_type = EVEX_T1S;
5386   _input_size_in_bits = EVEX_16bit;
5387   InstructionMark im(this);
5388   assert(dst != xnoreg, "sanity");
5389   int dst_enc = dst->encoding();
5390   // swap src<->dst for encoding
5391   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5392   emit_int8(0x79);
5393   emit_operand(dst, src);
5394 }
5395 
5396 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5397 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
5398   _instruction_uses_vl = true;
5399   assert(UseAVX > 1, "");
5400   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5401   emit_int8(0x58);
5402   emit_int8((unsigned char)(0xC0 | encode));
5403 }
5404 
5405 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
5406   _instruction_uses_vl = true;
5407   assert(UseAVX > 1, "");
5408   _tuple_type = EVEX_T1S;
5409   _input_size_in_bits = EVEX_32bit;
5410   InstructionMark im(this);
5411   assert(dst != xnoreg, "sanity");
5412   int dst_enc = dst->encoding();
5413   // swap src<->dst for encoding
5414   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5415   emit_int8(0x58);
5416   emit_operand(dst, src);
5417 }
5418 
5419 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5420 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
5421   _instruction_uses_vl = true;
5422   assert(UseAVX > 1, "");
5423   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5424                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5425   emit_int8(0x59);
5426   emit_int8((unsigned char)(0xC0 | encode));
5427 }
5428 
5429 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
5430   _instruction_uses_vl = true;
5431   assert(UseAVX > 1, "");
5432   _tuple_type = EVEX_T1S;
5433   _input_size_in_bits = EVEX_64bit;
5434   InstructionMark im(this);
5435   assert(dst != xnoreg, "sanity");
5436   int dst_enc = dst->encoding();
5437   // swap src<->dst for encoding
5438   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5439   emit_int8(0x59);
5440   emit_operand(dst, src);
5441 }
5442 
5443 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
5444 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
5445   _instruction_uses_vl = true;
5446   assert(UseAVX > 1, "");
5447   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5448                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5449   emit_int8(0x18);
5450   emit_int8((unsigned char)(0xC0 | encode));
5451 }
5452 
5453 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
5454   assert(UseAVX > 1, "");
5455   _tuple_type = EVEX_T1S;
5456   _input_size_in_bits = EVEX_32bit;
5457   InstructionMark im(this);
5458   assert(dst != xnoreg, "sanity");
5459   int dst_enc = dst->encoding();
5460   // swap src<->dst for encoding
5461   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5462   emit_int8(0x18);
5463   emit_operand(dst, src);
5464 }
5465 
5466 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
5467 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
5468   _instruction_uses_vl = true;
5469   assert(UseAVX > 1, "");
5470   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5471                                      /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5472   emit_int8(0x19);
5473   emit_int8((unsigned char)(0xC0 | encode));
5474 }
5475 
5476 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
5477   _instruction_uses_vl = true;
5478   assert(UseAVX > 1, "");
5479   _tuple_type = EVEX_T1S;
5480   _input_size_in_bits = EVEX_64bit;
5481   InstructionMark im(this);
5482   assert(dst != xnoreg, "sanity");
5483   int dst_enc = dst->encoding();
5484   // swap src<->dst for encoding
5485   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5486   emit_int8(0x19);
5487   emit_operand(dst, src);
5488 }
5489 
5490 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5491 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
5492   _instruction_uses_vl = true;
5493   assert(VM_Version::supports_evex(), "");
5494   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5495                                      /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5496   emit_int8(0x7A);
5497   emit_int8((unsigned char)(0xC0 | encode));
5498 }
5499 
5500 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5501 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
5502   _instruction_uses_vl = true;
5503   assert(VM_Version::supports_evex(), "");
5504   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5505                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5506   emit_int8(0x7B);
5507   emit_int8((unsigned char)(0xC0 | encode));
5508 }
5509 
5510 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5511 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
5512   _instruction_uses_vl = true;
5513   assert(VM_Version::supports_evex(), "");
5514   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5515                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5516   emit_int8(0x7C);
5517   emit_int8((unsigned char)(0xC0 | encode));
5518 }
5519 
5520 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5521 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
5522   _instruction_uses_vl = true;
5523   assert(VM_Version::supports_evex(), "");
5524   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5525                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5526   emit_int8(0x7C);
5527   emit_int8((unsigned char)(0xC0 | encode));
5528 }
5529 
5530 // Carry-Less Multiplication Quadword
5531 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
5532   assert(VM_Version::supports_clmul(), "");
5533   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
5534                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
5535   emit_int8(0x44);
5536   emit_int8((unsigned char)(0xC0 | encode));
5537   emit_int8((unsigned char)mask);
5538 }
5539 
5540 // Carry-Less Multiplication Quadword
5541 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
5542   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
5543   int vector_len = AVX_128bit;
5544   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
5545   emit_int8(0x44);
5546   emit_int8((unsigned char)(0xC0 | encode));
5547   emit_int8((unsigned char)mask);
5548 }
5549 
5550 void Assembler::vzeroupper() {
5551   assert(VM_Version::supports_avx(), "");
5552   if (UseAVX < 3)
5553   {
5554     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
5555     emit_int8(0x77);
5556   }
5557 }
5558 
5559 
5560 #ifndef _LP64
5561 // 32bit only pieces of the assembler
5562 
5563 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5564   // NO PREFIX AS NEVER 64BIT
5565   InstructionMark im(this);
5566   emit_int8((unsigned char)0x81);
5567   emit_int8((unsigned char)(0xF8 | src1->encoding()));
5568   emit_data(imm32, rspec, 0);
5569 }
5570 
5571 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5572   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
5573   InstructionMark im(this);
5574   emit_int8((unsigned char)0x81);
5575   emit_operand(rdi, src1);
5576   emit_data(imm32, rspec, 0);
5577 }
5578 
5579 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
5580 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
5581 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
5582 void Assembler::cmpxchg8(Address adr) {
5583   InstructionMark im(this);
5584   emit_int8(0x0F);
5585   emit_int8((unsigned char)0xC7);
5586   emit_operand(rcx, adr);
5587 }
5588 
5589 void Assembler::decl(Register dst) {
5590   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5591  emit_int8(0x48 | dst->encoding());
5592 }
5593 
5594 #endif // _LP64
5595 
5596 // 64bit typically doesn't use the x87 but needs to for the trig funcs
5597 
5598 void Assembler::fabs() {
5599   emit_int8((unsigned char)0xD9);
5600   emit_int8((unsigned char)0xE1);
5601 }
5602 
5603 void Assembler::fadd(int i) {
5604   emit_farith(0xD8, 0xC0, i);
5605 }
5606 
5607 void Assembler::fadd_d(Address src) {
5608   InstructionMark im(this);
5609   emit_int8((unsigned char)0xDC);
5610   emit_operand32(rax, src);
5611 }
5612 
5613 void Assembler::fadd_s(Address src) {
5614   InstructionMark im(this);
5615   emit_int8((unsigned char)0xD8);
5616   emit_operand32(rax, src);
5617 }
5618 
5619 void Assembler::fadda(int i) {
5620   emit_farith(0xDC, 0xC0, i);
5621 }
5622 
5623 void Assembler::faddp(int i) {
5624   emit_farith(0xDE, 0xC0, i);
5625 }
5626 
5627 void Assembler::fchs() {
5628   emit_int8((unsigned char)0xD9);
5629   emit_int8((unsigned char)0xE0);
5630 }
5631 
5632 void Assembler::fcom(int i) {
5633   emit_farith(0xD8, 0xD0, i);
5634 }
5635 
5636 void Assembler::fcomp(int i) {
5637   emit_farith(0xD8, 0xD8, i);
5638 }
5639 
5640 void Assembler::fcomp_d(Address src) {
5641   InstructionMark im(this);
5642   emit_int8((unsigned char)0xDC);
5643   emit_operand32(rbx, src);
5644 }
5645 
5646 void Assembler::fcomp_s(Address src) {
5647   InstructionMark im(this);
5648   emit_int8((unsigned char)0xD8);
5649   emit_operand32(rbx, src);
5650 }
5651 
5652 void Assembler::fcompp() {
5653   emit_int8((unsigned char)0xDE);
5654   emit_int8((unsigned char)0xD9);
5655 }
5656 
5657 void Assembler::fcos() {
5658   emit_int8((unsigned char)0xD9);
5659   emit_int8((unsigned char)0xFF);
5660 }
5661 
5662 void Assembler::fdecstp() {
5663   emit_int8((unsigned char)0xD9);
5664   emit_int8((unsigned char)0xF6);
5665 }
5666 
5667 void Assembler::fdiv(int i) {
5668   emit_farith(0xD8, 0xF0, i);
5669 }
5670 
5671 void Assembler::fdiv_d(Address src) {
5672   InstructionMark im(this);
5673   emit_int8((unsigned char)0xDC);
5674   emit_operand32(rsi, src);
5675 }
5676 
5677 void Assembler::fdiv_s(Address src) {
5678   InstructionMark im(this);
5679   emit_int8((unsigned char)0xD8);
5680   emit_operand32(rsi, src);
5681 }
5682 
5683 void Assembler::fdiva(int i) {
5684   emit_farith(0xDC, 0xF8, i);
5685 }
5686 
5687 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
5688 //       is erroneous for some of the floating-point instructions below.
5689 
5690 void Assembler::fdivp(int i) {
5691   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
5692 }
5693 
5694 void Assembler::fdivr(int i) {
5695   emit_farith(0xD8, 0xF8, i);
5696 }
5697 
5698 void Assembler::fdivr_d(Address src) {
5699   InstructionMark im(this);
5700   emit_int8((unsigned char)0xDC);
5701   emit_operand32(rdi, src);
5702 }
5703 
5704 void Assembler::fdivr_s(Address src) {
5705   InstructionMark im(this);
5706   emit_int8((unsigned char)0xD8);
5707   emit_operand32(rdi, src);
5708 }
5709 
5710 void Assembler::fdivra(int i) {
5711   emit_farith(0xDC, 0xF0, i);
5712 }
5713 
5714 void Assembler::fdivrp(int i) {
5715   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
5716 }
5717 
5718 void Assembler::ffree(int i) {
5719   emit_farith(0xDD, 0xC0, i);
5720 }
5721 
5722 void Assembler::fild_d(Address adr) {
5723   InstructionMark im(this);
5724   emit_int8((unsigned char)0xDF);
5725   emit_operand32(rbp, adr);
5726 }
5727 
5728 void Assembler::fild_s(Address adr) {
5729   InstructionMark im(this);
5730   emit_int8((unsigned char)0xDB);
5731   emit_operand32(rax, adr);
5732 }
5733 
5734 void Assembler::fincstp() {
5735   emit_int8((unsigned char)0xD9);
5736   emit_int8((unsigned char)0xF7);
5737 }
5738 
5739 void Assembler::finit() {
5740   emit_int8((unsigned char)0x9B);
5741   emit_int8((unsigned char)0xDB);
5742   emit_int8((unsigned char)0xE3);
5743 }
5744 
5745 void Assembler::fist_s(Address adr) {
5746   InstructionMark im(this);
5747   emit_int8((unsigned char)0xDB);
5748   emit_operand32(rdx, adr);
5749 }
5750 
5751 void Assembler::fistp_d(Address adr) {
5752   InstructionMark im(this);
5753   emit_int8((unsigned char)0xDF);
5754   emit_operand32(rdi, adr);
5755 }
5756 
5757 void Assembler::fistp_s(Address adr) {
5758   InstructionMark im(this);
5759   emit_int8((unsigned char)0xDB);
5760   emit_operand32(rbx, adr);
5761 }
5762 
5763 void Assembler::fld1() {
5764   emit_int8((unsigned char)0xD9);
5765   emit_int8((unsigned char)0xE8);
5766 }
5767 
5768 void Assembler::fld_d(Address adr) {
5769   InstructionMark im(this);
5770   emit_int8((unsigned char)0xDD);
5771   emit_operand32(rax, adr);
5772 }
5773 
5774 void Assembler::fld_s(Address adr) {
5775   InstructionMark im(this);
5776   emit_int8((unsigned char)0xD9);
5777   emit_operand32(rax, adr);
5778 }
5779 
5780 
5781 void Assembler::fld_s(int index) {
5782   emit_farith(0xD9, 0xC0, index);
5783 }
5784 
5785 void Assembler::fld_x(Address adr) {
5786   InstructionMark im(this);
5787   emit_int8((unsigned char)0xDB);
5788   emit_operand32(rbp, adr);
5789 }
5790 
5791 void Assembler::fldcw(Address src) {
5792   InstructionMark im(this);
5793   emit_int8((unsigned char)0xD9);
5794   emit_operand32(rbp, src);
5795 }
5796 
5797 void Assembler::fldenv(Address src) {
5798   InstructionMark im(this);
5799   emit_int8((unsigned char)0xD9);
5800   emit_operand32(rsp, src);
5801 }
5802 
5803 void Assembler::fldlg2() {
5804   emit_int8((unsigned char)0xD9);
5805   emit_int8((unsigned char)0xEC);
5806 }
5807 
5808 void Assembler::fldln2() {
5809   emit_int8((unsigned char)0xD9);
5810   emit_int8((unsigned char)0xED);
5811 }
5812 
5813 void Assembler::fldz() {
5814   emit_int8((unsigned char)0xD9);
5815   emit_int8((unsigned char)0xEE);
5816 }
5817 
5818 void Assembler::flog() {
5819   fldln2();
5820   fxch();
5821   fyl2x();
5822 }
5823 
5824 void Assembler::flog10() {
5825   fldlg2();
5826   fxch();
5827   fyl2x();
5828 }
5829 
5830 void Assembler::fmul(int i) {
5831   emit_farith(0xD8, 0xC8, i);
5832 }
5833 
5834 void Assembler::fmul_d(Address src) {
5835   InstructionMark im(this);
5836   emit_int8((unsigned char)0xDC);
5837   emit_operand32(rcx, src);
5838 }
5839 
5840 void Assembler::fmul_s(Address src) {
5841   InstructionMark im(this);
5842   emit_int8((unsigned char)0xD8);
5843   emit_operand32(rcx, src);
5844 }
5845 
5846 void Assembler::fmula(int i) {
5847   emit_farith(0xDC, 0xC8, i);
5848 }
5849 
5850 void Assembler::fmulp(int i) {
5851   emit_farith(0xDE, 0xC8, i);
5852 }
5853 
5854 void Assembler::fnsave(Address dst) {
5855   InstructionMark im(this);
5856   emit_int8((unsigned char)0xDD);
5857   emit_operand32(rsi, dst);
5858 }
5859 
5860 void Assembler::fnstcw(Address src) {
5861   InstructionMark im(this);
5862   emit_int8((unsigned char)0x9B);
5863   emit_int8((unsigned char)0xD9);
5864   emit_operand32(rdi, src);
5865 }
5866 
5867 void Assembler::fnstsw_ax() {
5868   emit_int8((unsigned char)0xDF);
5869   emit_int8((unsigned char)0xE0);
5870 }
5871 
5872 void Assembler::fprem() {
5873   emit_int8((unsigned char)0xD9);
5874   emit_int8((unsigned char)0xF8);
5875 }
5876 
5877 void Assembler::fprem1() {
5878   emit_int8((unsigned char)0xD9);
5879   emit_int8((unsigned char)0xF5);
5880 }
5881 
5882 void Assembler::frstor(Address src) {
5883   InstructionMark im(this);
5884   emit_int8((unsigned char)0xDD);
5885   emit_operand32(rsp, src);
5886 }
5887 
5888 void Assembler::fsin() {
5889   emit_int8((unsigned char)0xD9);
5890   emit_int8((unsigned char)0xFE);
5891 }
5892 
5893 void Assembler::fsqrt() {
5894   emit_int8((unsigned char)0xD9);
5895   emit_int8((unsigned char)0xFA);
5896 }
5897 
5898 void Assembler::fst_d(Address adr) {
5899   InstructionMark im(this);
5900   emit_int8((unsigned char)0xDD);
5901   emit_operand32(rdx, adr);
5902 }
5903 
5904 void Assembler::fst_s(Address adr) {
5905   InstructionMark im(this);
5906   emit_int8((unsigned char)0xD9);
5907   emit_operand32(rdx, adr);
5908 }
5909 
5910 void Assembler::fstp_d(Address adr) {
5911   InstructionMark im(this);
5912   emit_int8((unsigned char)0xDD);
5913   emit_operand32(rbx, adr);
5914 }
5915 
5916 void Assembler::fstp_d(int index) {
5917   emit_farith(0xDD, 0xD8, index);
5918 }
5919 
5920 void Assembler::fstp_s(Address adr) {
5921   InstructionMark im(this);
5922   emit_int8((unsigned char)0xD9);
5923   emit_operand32(rbx, adr);
5924 }
5925 
5926 void Assembler::fstp_x(Address adr) {
5927   InstructionMark im(this);
5928   emit_int8((unsigned char)0xDB);
5929   emit_operand32(rdi, adr);
5930 }
5931 
5932 void Assembler::fsub(int i) {
5933   emit_farith(0xD8, 0xE0, i);
5934 }
5935 
5936 void Assembler::fsub_d(Address src) {
5937   InstructionMark im(this);
5938   emit_int8((unsigned char)0xDC);
5939   emit_operand32(rsp, src);
5940 }
5941 
5942 void Assembler::fsub_s(Address src) {
5943   InstructionMark im(this);
5944   emit_int8((unsigned char)0xD8);
5945   emit_operand32(rsp, src);
5946 }
5947 
5948 void Assembler::fsuba(int i) {
5949   emit_farith(0xDC, 0xE8, i);
5950 }
5951 
5952 void Assembler::fsubp(int i) {
5953   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
5954 }
5955 
5956 void Assembler::fsubr(int i) {
5957   emit_farith(0xD8, 0xE8, i);
5958 }
5959 
5960 void Assembler::fsubr_d(Address src) {
5961   InstructionMark im(this);
5962   emit_int8((unsigned char)0xDC);
5963   emit_operand32(rbp, src);
5964 }
5965 
5966 void Assembler::fsubr_s(Address src) {
5967   InstructionMark im(this);
5968   emit_int8((unsigned char)0xD8);
5969   emit_operand32(rbp, src);
5970 }
5971 
5972 void Assembler::fsubra(int i) {
5973   emit_farith(0xDC, 0xE0, i);
5974 }
5975 
5976 void Assembler::fsubrp(int i) {
5977   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
5978 }
5979 
5980 void Assembler::ftan() {
5981   emit_int8((unsigned char)0xD9);
5982   emit_int8((unsigned char)0xF2);
5983   emit_int8((unsigned char)0xDD);
5984   emit_int8((unsigned char)0xD8);
5985 }
5986 
5987 void Assembler::ftst() {
5988   emit_int8((unsigned char)0xD9);
5989   emit_int8((unsigned char)0xE4);
5990 }
5991 
5992 void Assembler::fucomi(int i) {
5993   // make sure the instruction is supported (introduced for P6, together with cmov)
5994   guarantee(VM_Version::supports_cmov(), "illegal instruction");
5995   emit_farith(0xDB, 0xE8, i);
5996 }
5997 
5998 void Assembler::fucomip(int i) {
5999   // make sure the instruction is supported (introduced for P6, together with cmov)
6000   guarantee(VM_Version::supports_cmov(), "illegal instruction");
6001   emit_farith(0xDF, 0xE8, i);
6002 }
6003 
6004 void Assembler::fwait() {
6005   emit_int8((unsigned char)0x9B);
6006 }
6007 
6008 void Assembler::fxch(int i) {
6009   emit_farith(0xD9, 0xC8, i);
6010 }
6011 
6012 void Assembler::fyl2x() {
6013   emit_int8((unsigned char)0xD9);
6014   emit_int8((unsigned char)0xF1);
6015 }
6016 
6017 void Assembler::frndint() {
6018   emit_int8((unsigned char)0xD9);
6019   emit_int8((unsigned char)0xFC);
6020 }
6021 
6022 void Assembler::f2xm1() {
6023   emit_int8((unsigned char)0xD9);
6024   emit_int8((unsigned char)0xF0);
6025 }
6026 
6027 void Assembler::fldl2e() {
6028   emit_int8((unsigned char)0xD9);
6029   emit_int8((unsigned char)0xEA);
6030 }
6031 
6032 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
6033 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
6034 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
6035 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
6036 
6037 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
6038 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6039   if (pre > 0) {
6040     emit_int8(simd_pre[pre]);
6041   }
6042   if (rex_w) {
6043     prefixq(adr, xreg);
6044   } else {
6045     prefix(adr, xreg);
6046   }
6047   if (opc > 0) {
6048     emit_int8(0x0F);
6049     int opc2 = simd_opc[opc];
6050     if (opc2 > 0) {
6051       emit_int8(opc2);
6052     }
6053   }
6054 }
6055 
6056 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6057   if (pre > 0) {
6058     emit_int8(simd_pre[pre]);
6059   }
6060   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
6061                           prefix_and_encode(dst_enc, src_enc);
6062   if (opc > 0) {
6063     emit_int8(0x0F);
6064     int opc2 = simd_opc[opc];
6065     if (opc2 > 0) {
6066       emit_int8(opc2);
6067     }
6068   }
6069   return encode;
6070 }
6071 
6072 
6073 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
6074   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
6075     prefix(VEX_3bytes);
6076 
6077     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
6078     byte1 = (~byte1) & 0xE0;
6079     byte1 |= opc;
6080     emit_int8(byte1);
6081 
6082     int byte2 = ((~nds_enc) & 0xf) << 3;
6083     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
6084     emit_int8(byte2);
6085   } else {
6086     prefix(VEX_2bytes);
6087 
6088     int byte1 = vex_r ? VEX_R : 0;
6089     byte1 = (~byte1) & 0x80;
6090     byte1 |= ((~nds_enc) & 0xf) << 3;
6091     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
6092     emit_int8(byte1);
6093   }
6094 }
6095 
6096 // This is a 4 byte encoding
6097 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
6098                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
6099                             bool is_extended_context, bool is_merge_context,
6100                             int vector_len, bool no_mask_reg ){
6101   // EVEX 0x62 prefix
6102   prefix(EVEX_4bytes);
6103   _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
6104 
6105   // P0: byte 2, initialized to RXBR`00mm
6106   // instead of not'd
6107   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
6108   byte2 = (~byte2) & 0xF0;
6109   // confine opc opcode extensions in mm bits to lower two bits
6110   // of form {0F, 0F_38, 0F_3A}
6111   byte2 |= opc;
6112   emit_int8(byte2);
6113 
6114   // P1: byte 3 as Wvvvv1pp
6115   int byte3 = ((~nds_enc) & 0xf) << 3;
6116   // p[10] is always 1
6117   byte3 |= EVEX_F;
6118   byte3 |= (vex_w & 1) << 7;
6119   // confine pre opcode extensions in pp bits to lower two bits
6120   // of form {66, F3, F2}
6121   byte3 |= pre;
6122   emit_int8(byte3);
6123 
6124   // P2: byte 4 as zL'Lbv'aaa
6125   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
6126   // EVEX.v` for extending EVEX.vvvv or VIDX
6127   byte4 |= (evex_v ? 0: EVEX_V);
6128   // third EXEC.b for broadcast actions
6129   byte4 |= (is_extended_context ? EVEX_Rb : 0);
6130   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
6131   byte4 |= ((vector_len) & 0x3) << 5;
6132   // last is EVEX.z for zero/merge actions
6133   byte4 |= (is_merge_context ? EVEX_Z : 0);
6134   emit_int8(byte4);
6135 }
6136 
6137 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
6138                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
6139   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
6140   bool vex_b = adr.base_needs_rex();
6141   bool vex_x = adr.index_needs_rex();
6142   _avx_vector_len = vector_len;
6143 
6144   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6145   if (_legacy_mode_vl && _instruction_uses_vl) {
6146     switch (vector_len) {
6147     case AVX_128bit:
6148     case AVX_256bit:
6149       legacy_mode = true;
6150       break;
6151     }
6152   }
6153 
6154   if ((UseAVX > 2) && (legacy_mode == false))
6155   {
6156     bool evex_r = (xreg_enc >= 16);
6157     bool evex_v = (nds_enc >= 16);
6158     _is_evex_instruction = true;
6159     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6160   } else {
6161     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6162   }
6163   _instruction_uses_vl = false;
6164 }
6165 
6166 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
6167                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
6168   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
6169   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
6170   bool vex_x = false;
6171   _avx_vector_len = vector_len;
6172 
6173   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6174   if (_legacy_mode_vl && _instruction_uses_vl) {
6175     switch (vector_len) {
6176     case AVX_128bit:
6177     case AVX_256bit:
6178       legacy_mode = true;
6179       break;
6180     }
6181   }
6182 
6183   if ((UseAVX > 2) && (legacy_mode == false))
6184   {
6185     bool evex_r = (dst_enc >= 16);
6186     bool evex_v = (nds_enc >= 16);
6187     // can use vex_x as bank extender on rm encoding
6188     vex_x = (src_enc >= 16);
6189     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6190   } else {
6191     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6192   }
6193 
6194   _instruction_uses_vl = false;
6195 
6196   // return modrm byte components for operands
6197   return (((dst_enc & 7) << 3) | (src_enc & 7));
6198 }
6199 
6200 
6201 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
6202                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6203   if (UseAVX > 0) {
6204     int xreg_enc = xreg->encoding();
6205     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
6206     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6207   } else {
6208     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
6209     rex_prefix(adr, xreg, pre, opc, rex_w);
6210   }
6211 }
6212 
6213 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
6214                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6215   int dst_enc = dst->encoding();
6216   int src_enc = src->encoding();
6217   if (UseAVX > 0) {
6218     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6219     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6220   } else {
6221     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
6222     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
6223   }
6224 }
6225 
6226 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
6227                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6228   int dst_enc = dst->encoding();
6229   int src_enc = src->encoding();
6230   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6231   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6232 }
6233 
6234 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
6235                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6236   int dst_enc = dst->encoding();
6237   int src_enc = src->encoding();
6238   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6239   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6240 }
6241 
6242 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6243   InstructionMark im(this);
6244   simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6245   emit_int8(opcode);
6246   emit_operand(dst, src);
6247 }
6248 
6249 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
6250   InstructionMark im(this);
6251   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
6252   emit_int8(opcode);
6253   emit_operand(dst, src);
6254 }
6255 
6256 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6257   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6258   emit_int8(opcode);
6259   emit_int8((unsigned char)(0xC0 | encode));
6260 }
6261 
6262 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6263   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
6264   emit_int8(opcode);
6265   emit_int8((unsigned char)(0xC0 | encode));
6266 }
6267 
6268 // Versions with no second source register (non-destructive source).
6269 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6270   InstructionMark im(this);
6271   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
6272   emit_int8(opcode);
6273   emit_operand(dst, src);
6274 }
6275 
6276 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6277   InstructionMark im(this);
6278   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
6279   emit_int8(opcode);
6280   emit_operand(dst, src);
6281 }
6282 
6283 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6284   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6285   emit_int8(opcode);
6286   emit_int8((unsigned char)(0xC0 | encode));
6287 }
6288 
6289 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6290   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
6291   emit_int8(opcode);
6292   emit_int8((unsigned char)(0xC0 | encode));
6293 }
6294 
6295 // 3-operands AVX instructions
6296 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
6297                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6298   InstructionMark im(this);
6299   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
6300   emit_int8(opcode);
6301   emit_operand(dst, src);
6302 }
6303 
6304 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
6305                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6306   InstructionMark im(this);
6307   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
6308   emit_int8(opcode);
6309   emit_operand(dst, src);
6310 }
6311 
6312 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6313                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6314   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
6315   emit_int8(opcode);
6316   emit_int8((unsigned char)(0xC0 | encode));
6317 }
6318 
6319 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6320                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6321   int src_enc = src->encoding();
6322   int dst_enc = dst->encoding();
6323   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6324   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
6325   emit_int8(opcode);
6326   emit_int8((unsigned char)(0xC0 | encode));
6327 }
6328 
6329 #ifndef _LP64
6330 
6331 void Assembler::incl(Register dst) {
6332   // Don't use it directly. Use MacroAssembler::incrementl() instead.
6333   emit_int8(0x40 | dst->encoding());
6334 }
6335 
6336 void Assembler::lea(Register dst, Address src) {
6337   leal(dst, src);
6338 }
6339 
6340 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
6341   InstructionMark im(this);
6342   emit_int8((unsigned char)0xC7);
6343   emit_operand(rax, dst);
6344   emit_data((int)imm32, rspec, 0);
6345 }
6346 
6347 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
6348   InstructionMark im(this);
6349   int encode = prefix_and_encode(dst->encoding());
6350   emit_int8((unsigned char)(0xB8 | encode));
6351   emit_data((int)imm32, rspec, 0);
6352 }
6353 
6354 void Assembler::popa() { // 32bit
6355   emit_int8(0x61);
6356 }
6357 
6358 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
6359   InstructionMark im(this);
6360   emit_int8(0x68);
6361   emit_data(imm32, rspec, 0);
6362 }
6363 
6364 void Assembler::pusha() { // 32bit
6365   emit_int8(0x60);
6366 }
6367 
6368 void Assembler::set_byte_if_not_zero(Register dst) {
6369   emit_int8(0x0F);
6370   emit_int8((unsigned char)0x95);
6371   emit_int8((unsigned char)(0xE0 | dst->encoding()));
6372 }
6373 
6374 void Assembler::shldl(Register dst, Register src) {
6375   emit_int8(0x0F);
6376   emit_int8((unsigned char)0xA5);
6377   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6378 }
6379 
6380 // 0F A4 / r ib
6381 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
6382   emit_int8(0x0F);
6383   emit_int8((unsigned char)0xA4);
6384   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6385   emit_int8(imm8);
6386 }
6387 
6388 void Assembler::shrdl(Register dst, Register src) {
6389   emit_int8(0x0F);
6390   emit_int8((unsigned char)0xAD);
6391   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6392 }
6393 
6394 #else // LP64
6395 
6396 void Assembler::set_byte_if_not_zero(Register dst) {
6397   int enc = prefix_and_encode(dst->encoding(), true);
6398   emit_int8(0x0F);
6399   emit_int8((unsigned char)0x95);
6400   emit_int8((unsigned char)(0xE0 | enc));
6401 }
6402 
6403 // 64bit only pieces of the assembler
6404 // This should only be used by 64bit instructions that can use rip-relative
6405 // it cannot be used by instructions that want an immediate value.
6406 
6407 bool Assembler::reachable(AddressLiteral adr) {
6408   int64_t disp;
6409   // None will force a 64bit literal to the code stream. Likely a placeholder
6410   // for something that will be patched later and we need to certain it will
6411   // always be reachable.
6412   if (adr.reloc() == relocInfo::none) {
6413     return false;
6414   }
6415   if (adr.reloc() == relocInfo::internal_word_type) {
6416     // This should be rip relative and easily reachable.
6417     return true;
6418   }
6419   if (adr.reloc() == relocInfo::virtual_call_type ||
6420       adr.reloc() == relocInfo::opt_virtual_call_type ||
6421       adr.reloc() == relocInfo::static_call_type ||
6422       adr.reloc() == relocInfo::static_stub_type ) {
6423     // This should be rip relative within the code cache and easily
6424     // reachable until we get huge code caches. (At which point
6425     // ic code is going to have issues).
6426     return true;
6427   }
6428   if (adr.reloc() != relocInfo::external_word_type &&
6429       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
6430       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
6431       adr.reloc() != relocInfo::runtime_call_type ) {
6432     return false;
6433   }
6434 
6435   // Stress the correction code
6436   if (ForceUnreachable) {
6437     // Must be runtimecall reloc, see if it is in the codecache
6438     // Flipping stuff in the codecache to be unreachable causes issues
6439     // with things like inline caches where the additional instructions
6440     // are not handled.
6441     if (CodeCache::find_blob(adr._target) == NULL) {
6442       return false;
6443     }
6444   }
6445   // For external_word_type/runtime_call_type if it is reachable from where we
6446   // are now (possibly a temp buffer) and where we might end up
6447   // anywhere in the codeCache then we are always reachable.
6448   // This would have to change if we ever save/restore shared code
6449   // to be more pessimistic.
6450   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
6451   if (!is_simm32(disp)) return false;
6452   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
6453   if (!is_simm32(disp)) return false;
6454 
6455   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
6456 
6457   // Because rip relative is a disp + address_of_next_instruction and we
6458   // don't know the value of address_of_next_instruction we apply a fudge factor
6459   // to make sure we will be ok no matter the size of the instruction we get placed into.
6460   // We don't have to fudge the checks above here because they are already worst case.
6461 
6462   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
6463   // + 4 because better safe than sorry.
6464   const int fudge = 12 + 4;
6465   if (disp < 0) {
6466     disp -= fudge;
6467   } else {
6468     disp += fudge;
6469   }
6470   return is_simm32(disp);
6471 }
6472 
6473 // Check if the polling page is not reachable from the code cache using rip-relative
6474 // addressing.
6475 bool Assembler::is_polling_page_far() {
6476   intptr_t addr = (intptr_t)os::get_polling_page();
6477   return ForceUnreachable ||
6478          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
6479          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
6480 }
6481 
6482 void Assembler::emit_data64(jlong data,
6483                             relocInfo::relocType rtype,
6484                             int format) {
6485   if (rtype == relocInfo::none) {
6486     emit_int64(data);
6487   } else {
6488     emit_data64(data, Relocation::spec_simple(rtype), format);
6489   }
6490 }
6491 
6492 void Assembler::emit_data64(jlong data,
6493                             RelocationHolder const& rspec,
6494                             int format) {
6495   assert(imm_operand == 0, "default format must be immediate in this file");
6496   assert(imm_operand == format, "must be immediate");
6497   assert(inst_mark() != NULL, "must be inside InstructionMark");
6498   // Do not use AbstractAssembler::relocate, which is not intended for
6499   // embedded words.  Instead, relocate to the enclosing instruction.
6500   code_section()->relocate(inst_mark(), rspec, format);
6501 #ifdef ASSERT
6502   check_relocation(rspec, format);
6503 #endif
6504   emit_int64(data);
6505 }
6506 
6507 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
6508   if (reg_enc >= 8) {
6509     prefix(REX_B);
6510     reg_enc -= 8;
6511   } else if (byteinst && reg_enc >= 4) {
6512     prefix(REX);
6513   }
6514   return reg_enc;
6515 }
6516 
6517 int Assembler::prefixq_and_encode(int reg_enc) {
6518   if (reg_enc < 8) {
6519     prefix(REX_W);
6520   } else {
6521     prefix(REX_WB);
6522     reg_enc -= 8;
6523   }
6524   return reg_enc;
6525 }
6526 
6527 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
6528   if (dst_enc < 8) {
6529     if (src_enc >= 8) {
6530       prefix(REX_B);
6531       src_enc -= 8;
6532     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
6533       prefix(REX);
6534     }
6535   } else {
6536     if (src_enc < 8) {
6537       prefix(REX_R);
6538     } else {
6539       prefix(REX_RB);
6540       src_enc -= 8;
6541     }
6542     dst_enc -= 8;
6543   }
6544   return dst_enc << 3 | src_enc;
6545 }
6546 
6547 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
6548   if (dst_enc < 8) {
6549     if (src_enc < 8) {
6550       prefix(REX_W);
6551     } else {
6552       prefix(REX_WB);
6553       src_enc -= 8;
6554     }
6555   } else {
6556     if (src_enc < 8) {
6557       prefix(REX_WR);
6558     } else {
6559       prefix(REX_WRB);
6560       src_enc -= 8;
6561     }
6562     dst_enc -= 8;
6563   }
6564   return dst_enc << 3 | src_enc;
6565 }
6566 
6567 void Assembler::prefix(Register reg) {
6568   if (reg->encoding() >= 8) {
6569     prefix(REX_B);
6570   }
6571 }
6572 
6573 void Assembler::prefix(Register dst, Register src, Prefix p) {
6574   if (src->encoding() >= 8) {
6575     p = (Prefix)(p | REX_B);
6576   }
6577   if (dst->encoding() >= 8) {
6578     p = (Prefix)( p | REX_R);
6579   }
6580   if (p != Prefix_EMPTY) {
6581     // do not generate an empty prefix
6582     prefix(p);
6583   }
6584 }
6585 
6586 void Assembler::prefix(Register dst, Address adr, Prefix p) {
6587   if (adr.base_needs_rex()) {
6588     if (adr.index_needs_rex()) {
6589       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6590     } else {
6591       prefix(REX_B);
6592     }
6593   } else {
6594     if (adr.index_needs_rex()) {
6595       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6596     }
6597   }
6598   if (dst->encoding() >= 8) {
6599     p = (Prefix)(p | REX_R);
6600   }
6601   if (p != Prefix_EMPTY) {
6602     // do not generate an empty prefix
6603     prefix(p);
6604   }
6605 }
6606 
6607 void Assembler::prefix(Address adr) {
6608   if (adr.base_needs_rex()) {
6609     if (adr.index_needs_rex()) {
6610       prefix(REX_XB);
6611     } else {
6612       prefix(REX_B);
6613     }
6614   } else {
6615     if (adr.index_needs_rex()) {
6616       prefix(REX_X);
6617     }
6618   }
6619 }
6620 
6621 void Assembler::prefixq(Address adr) {
6622   if (adr.base_needs_rex()) {
6623     if (adr.index_needs_rex()) {
6624       prefix(REX_WXB);
6625     } else {
6626       prefix(REX_WB);
6627     }
6628   } else {
6629     if (adr.index_needs_rex()) {
6630       prefix(REX_WX);
6631     } else {
6632       prefix(REX_W);
6633     }
6634   }
6635 }
6636 
6637 
6638 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
6639   if (reg->encoding() < 8) {
6640     if (adr.base_needs_rex()) {
6641       if (adr.index_needs_rex()) {
6642         prefix(REX_XB);
6643       } else {
6644         prefix(REX_B);
6645       }
6646     } else {
6647       if (adr.index_needs_rex()) {
6648         prefix(REX_X);
6649       } else if (byteinst && reg->encoding() >= 4 ) {
6650         prefix(REX);
6651       }
6652     }
6653   } else {
6654     if (adr.base_needs_rex()) {
6655       if (adr.index_needs_rex()) {
6656         prefix(REX_RXB);
6657       } else {
6658         prefix(REX_RB);
6659       }
6660     } else {
6661       if (adr.index_needs_rex()) {
6662         prefix(REX_RX);
6663       } else {
6664         prefix(REX_R);
6665       }
6666     }
6667   }
6668 }
6669 
6670 void Assembler::prefixq(Address adr, Register src) {
6671   if (src->encoding() < 8) {
6672     if (adr.base_needs_rex()) {
6673       if (adr.index_needs_rex()) {
6674         prefix(REX_WXB);
6675       } else {
6676         prefix(REX_WB);
6677       }
6678     } else {
6679       if (adr.index_needs_rex()) {
6680         prefix(REX_WX);
6681       } else {
6682         prefix(REX_W);
6683       }
6684     }
6685   } else {
6686     if (adr.base_needs_rex()) {
6687       if (adr.index_needs_rex()) {
6688         prefix(REX_WRXB);
6689       } else {
6690         prefix(REX_WRB);
6691       }
6692     } else {
6693       if (adr.index_needs_rex()) {
6694         prefix(REX_WRX);
6695       } else {
6696         prefix(REX_WR);
6697       }
6698     }
6699   }
6700 }
6701 
6702 void Assembler::prefix(Address adr, XMMRegister reg) {
6703   if (reg->encoding() < 8) {
6704     if (adr.base_needs_rex()) {
6705       if (adr.index_needs_rex()) {
6706         prefix(REX_XB);
6707       } else {
6708         prefix(REX_B);
6709       }
6710     } else {
6711       if (adr.index_needs_rex()) {
6712         prefix(REX_X);
6713       }
6714     }
6715   } else {
6716     if (adr.base_needs_rex()) {
6717       if (adr.index_needs_rex()) {
6718         prefix(REX_RXB);
6719       } else {
6720         prefix(REX_RB);
6721       }
6722     } else {
6723       if (adr.index_needs_rex()) {
6724         prefix(REX_RX);
6725       } else {
6726         prefix(REX_R);
6727       }
6728     }
6729   }
6730 }
6731 
6732 void Assembler::prefixq(Address adr, XMMRegister src) {
6733   if (src->encoding() < 8) {
6734     if (adr.base_needs_rex()) {
6735       if (adr.index_needs_rex()) {
6736         prefix(REX_WXB);
6737       } else {
6738         prefix(REX_WB);
6739       }
6740     } else {
6741       if (adr.index_needs_rex()) {
6742         prefix(REX_WX);
6743       } else {
6744         prefix(REX_W);
6745       }
6746     }
6747   } else {
6748     if (adr.base_needs_rex()) {
6749       if (adr.index_needs_rex()) {
6750         prefix(REX_WRXB);
6751       } else {
6752         prefix(REX_WRB);
6753       }
6754     } else {
6755       if (adr.index_needs_rex()) {
6756         prefix(REX_WRX);
6757       } else {
6758         prefix(REX_WR);
6759       }
6760     }
6761   }
6762 }
6763 
6764 void Assembler::adcq(Register dst, int32_t imm32) {
6765   (void) prefixq_and_encode(dst->encoding());
6766   emit_arith(0x81, 0xD0, dst, imm32);
6767 }
6768 
6769 void Assembler::adcq(Register dst, Address src) {
6770   InstructionMark im(this);
6771   prefixq(src, dst);
6772   emit_int8(0x13);
6773   emit_operand(dst, src);
6774 }
6775 
6776 void Assembler::adcq(Register dst, Register src) {
6777   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6778   emit_arith(0x13, 0xC0, dst, src);
6779 }
6780 
6781 void Assembler::addq(Address dst, int32_t imm32) {
6782   InstructionMark im(this);
6783   prefixq(dst);
6784   emit_arith_operand(0x81, rax, dst,imm32);
6785 }
6786 
6787 void Assembler::addq(Address dst, Register src) {
6788   InstructionMark im(this);
6789   prefixq(dst, src);
6790   emit_int8(0x01);
6791   emit_operand(src, dst);
6792 }
6793 
6794 void Assembler::addq(Register dst, int32_t imm32) {
6795   (void) prefixq_and_encode(dst->encoding());
6796   emit_arith(0x81, 0xC0, dst, imm32);
6797 }
6798 
6799 void Assembler::addq(Register dst, Address src) {
6800   InstructionMark im(this);
6801   prefixq(src, dst);
6802   emit_int8(0x03);
6803   emit_operand(dst, src);
6804 }
6805 
6806 void Assembler::addq(Register dst, Register src) {
6807   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6808   emit_arith(0x03, 0xC0, dst, src);
6809 }
6810 
6811 void Assembler::adcxq(Register dst, Register src) {
6812   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6813   emit_int8((unsigned char)0x66);
6814   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6815   emit_int8(0x0F);
6816   emit_int8(0x38);
6817   emit_int8((unsigned char)0xF6);
6818   emit_int8((unsigned char)(0xC0 | encode));
6819 }
6820 
6821 void Assembler::adoxq(Register dst, Register src) {
6822   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6823   emit_int8((unsigned char)0xF3);
6824   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6825   emit_int8(0x0F);
6826   emit_int8(0x38);
6827   emit_int8((unsigned char)0xF6);
6828   emit_int8((unsigned char)(0xC0 | encode));
6829 }
6830 
6831 void Assembler::andq(Address dst, int32_t imm32) {
6832   InstructionMark im(this);
6833   prefixq(dst);
6834   emit_int8((unsigned char)0x81);
6835   emit_operand(rsp, dst, 4);
6836   emit_int32(imm32);
6837 }
6838 
6839 void Assembler::andq(Register dst, int32_t imm32) {
6840   (void) prefixq_and_encode(dst->encoding());
6841   emit_arith(0x81, 0xE0, dst, imm32);
6842 }
6843 
6844 void Assembler::andq(Register dst, Address src) {
6845   InstructionMark im(this);
6846   prefixq(src, dst);
6847   emit_int8(0x23);
6848   emit_operand(dst, src);
6849 }
6850 
6851 void Assembler::andq(Register dst, Register src) {
6852   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6853   emit_arith(0x23, 0xC0, dst, src);
6854 }
6855 
6856 void Assembler::andnq(Register dst, Register src1, Register src2) {
6857   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6858   int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
6859   emit_int8((unsigned char)0xF2);
6860   emit_int8((unsigned char)(0xC0 | encode));
6861 }
6862 
6863 void Assembler::andnq(Register dst, Register src1, Address src2) {
6864   InstructionMark im(this);
6865   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6866   vex_prefix_0F38_q_legacy(dst, src1, src2);
6867   emit_int8((unsigned char)0xF2);
6868   emit_operand(dst, src2);
6869 }
6870 
6871 void Assembler::bsfq(Register dst, Register src) {
6872   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6873   emit_int8(0x0F);
6874   emit_int8((unsigned char)0xBC);
6875   emit_int8((unsigned char)(0xC0 | encode));
6876 }
6877 
6878 void Assembler::bsrq(Register dst, Register src) {
6879   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6880   emit_int8(0x0F);
6881   emit_int8((unsigned char)0xBD);
6882   emit_int8((unsigned char)(0xC0 | encode));
6883 }
6884 
6885 void Assembler::bswapq(Register reg) {
6886   int encode = prefixq_and_encode(reg->encoding());
6887   emit_int8(0x0F);
6888   emit_int8((unsigned char)(0xC8 | encode));
6889 }
6890 
6891 void Assembler::blsiq(Register dst, Register src) {
6892   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6893   int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
6894   emit_int8((unsigned char)0xF3);
6895   emit_int8((unsigned char)(0xC0 | encode));
6896 }
6897 
6898 void Assembler::blsiq(Register dst, Address src) {
6899   InstructionMark im(this);
6900   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6901   vex_prefix_0F38_q_legacy(rbx, dst, src);
6902   emit_int8((unsigned char)0xF3);
6903   emit_operand(rbx, src);
6904 }
6905 
6906 void Assembler::blsmskq(Register dst, Register src) {
6907   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6908   int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
6909   emit_int8((unsigned char)0xF3);
6910   emit_int8((unsigned char)(0xC0 | encode));
6911 }
6912 
6913 void Assembler::blsmskq(Register dst, Address src) {
6914   InstructionMark im(this);
6915   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6916   vex_prefix_0F38_q_legacy(rdx, dst, src);
6917   emit_int8((unsigned char)0xF3);
6918   emit_operand(rdx, src);
6919 }
6920 
6921 void Assembler::blsrq(Register dst, Register src) {
6922   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6923   int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
6924   emit_int8((unsigned char)0xF3);
6925   emit_int8((unsigned char)(0xC0 | encode));
6926 }
6927 
6928 void Assembler::blsrq(Register dst, Address src) {
6929   InstructionMark im(this);
6930   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6931   vex_prefix_0F38_q_legacy(rcx, dst, src);
6932   emit_int8((unsigned char)0xF3);
6933   emit_operand(rcx, src);
6934 }
6935 
6936 void Assembler::cdqq() {
6937   prefix(REX_W);
6938   emit_int8((unsigned char)0x99);
6939 }
6940 
6941 void Assembler::clflush(Address adr) {
6942   prefix(adr);
6943   emit_int8(0x0F);
6944   emit_int8((unsigned char)0xAE);
6945   emit_operand(rdi, adr);
6946 }
6947 
6948 void Assembler::cmovq(Condition cc, Register dst, Register src) {
6949   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6950   emit_int8(0x0F);
6951   emit_int8(0x40 | cc);
6952   emit_int8((unsigned char)(0xC0 | encode));
6953 }
6954 
6955 void Assembler::cmovq(Condition cc, Register dst, Address src) {
6956   InstructionMark im(this);
6957   prefixq(src, dst);
6958   emit_int8(0x0F);
6959   emit_int8(0x40 | cc);
6960   emit_operand(dst, src);
6961 }
6962 
6963 void Assembler::cmpq(Address dst, int32_t imm32) {
6964   InstructionMark im(this);
6965   prefixq(dst);
6966   emit_int8((unsigned char)0x81);
6967   emit_operand(rdi, dst, 4);
6968   emit_int32(imm32);
6969 }
6970 
6971 void Assembler::cmpq(Register dst, int32_t imm32) {
6972   (void) prefixq_and_encode(dst->encoding());
6973   emit_arith(0x81, 0xF8, dst, imm32);
6974 }
6975 
6976 void Assembler::cmpq(Address dst, Register src) {
6977   InstructionMark im(this);
6978   prefixq(dst, src);
6979   emit_int8(0x3B);
6980   emit_operand(src, dst);
6981 }
6982 
6983 void Assembler::cmpq(Register dst, Register src) {
6984   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6985   emit_arith(0x3B, 0xC0, dst, src);
6986 }
6987 
6988 void Assembler::cmpq(Register dst, Address  src) {
6989   InstructionMark im(this);
6990   prefixq(src, dst);
6991   emit_int8(0x3B);
6992   emit_operand(dst, src);
6993 }
6994 
6995 void Assembler::cmpxchgq(Register reg, Address adr) {
6996   InstructionMark im(this);
6997   prefixq(adr, reg);
6998   emit_int8(0x0F);
6999   emit_int8((unsigned char)0xB1);
7000   emit_operand(reg, adr);
7001 }
7002 
7003 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
7004   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7005   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7006   emit_int8(0x2A);
7007   emit_int8((unsigned char)(0xC0 | encode));
7008 }
7009 
7010 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
7011   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7012   if (VM_Version::supports_evex()) {
7013     _tuple_type = EVEX_T1S;
7014     _input_size_in_bits = EVEX_32bit;
7015   }
7016   InstructionMark im(this);
7017   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7018   emit_int8(0x2A);
7019   emit_operand(dst, src);
7020 }
7021 
7022 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
7023   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7024   if (VM_Version::supports_evex()) {
7025     _tuple_type = EVEX_T1S;
7026     _input_size_in_bits = EVEX_32bit;
7027   }
7028   InstructionMark im(this);
7029   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
7030   emit_int8(0x2A);
7031   emit_operand(dst, src);
7032 }
7033 
7034 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
7035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7036   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
7037   emit_int8(0x2C);
7038   emit_int8((unsigned char)(0xC0 | encode));
7039 }
7040 
7041 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
7042   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7043   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
7044   emit_int8(0x2C);
7045   emit_int8((unsigned char)(0xC0 | encode));
7046 }
7047 
7048 void Assembler::decl(Register dst) {
7049   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7050   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
7051   int encode = prefix_and_encode(dst->encoding());
7052   emit_int8((unsigned char)0xFF);
7053   emit_int8((unsigned char)(0xC8 | encode));
7054 }
7055 
7056 void Assembler::decq(Register dst) {
7057   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7058   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7059   int encode = prefixq_and_encode(dst->encoding());
7060   emit_int8((unsigned char)0xFF);
7061   emit_int8(0xC8 | encode);
7062 }
7063 
7064 void Assembler::decq(Address dst) {
7065   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7066   InstructionMark im(this);
7067   prefixq(dst);
7068   emit_int8((unsigned char)0xFF);
7069   emit_operand(rcx, dst);
7070 }
7071 
7072 void Assembler::fxrstor(Address src) {
7073   prefixq(src);
7074   emit_int8(0x0F);
7075   emit_int8((unsigned char)0xAE);
7076   emit_operand(as_Register(1), src);
7077 }
7078 
7079 void Assembler::xrstor(Address src) {
7080   prefixq(src);
7081   emit_int8(0x0F);
7082   emit_int8((unsigned char)0xAE);
7083   emit_operand(as_Register(5), src);
7084 }
7085 
7086 void Assembler::fxsave(Address dst) {
7087   prefixq(dst);
7088   emit_int8(0x0F);
7089   emit_int8((unsigned char)0xAE);
7090   emit_operand(as_Register(0), dst);
7091 }
7092 
7093 void Assembler::xsave(Address dst) {
7094   prefixq(dst);
7095   emit_int8(0x0F);
7096   emit_int8((unsigned char)0xAE);
7097   emit_operand(as_Register(4), dst);
7098 }
7099 
7100 void Assembler::idivq(Register src) {
7101   int encode = prefixq_and_encode(src->encoding());
7102   emit_int8((unsigned char)0xF7);
7103   emit_int8((unsigned char)(0xF8 | encode));
7104 }
7105 
7106 void Assembler::imulq(Register dst, Register src) {
7107   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7108   emit_int8(0x0F);
7109   emit_int8((unsigned char)0xAF);
7110   emit_int8((unsigned char)(0xC0 | encode));
7111 }
7112 
7113 void Assembler::imulq(Register dst, Register src, int value) {
7114   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7115   if (is8bit(value)) {
7116     emit_int8(0x6B);
7117     emit_int8((unsigned char)(0xC0 | encode));
7118     emit_int8(value & 0xFF);
7119   } else {
7120     emit_int8(0x69);
7121     emit_int8((unsigned char)(0xC0 | encode));
7122     emit_int32(value);
7123   }
7124 }
7125 
7126 void Assembler::imulq(Register dst, Address src) {
7127   InstructionMark im(this);
7128   prefixq(src, dst);
7129   emit_int8(0x0F);
7130   emit_int8((unsigned char) 0xAF);
7131   emit_operand(dst, src);
7132 }
7133 
7134 void Assembler::incl(Register dst) {
7135   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7136   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7137   int encode = prefix_and_encode(dst->encoding());
7138   emit_int8((unsigned char)0xFF);
7139   emit_int8((unsigned char)(0xC0 | encode));
7140 }
7141 
7142 void Assembler::incq(Register dst) {
7143   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7144   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7145   int encode = prefixq_and_encode(dst->encoding());
7146   emit_int8((unsigned char)0xFF);
7147   emit_int8((unsigned char)(0xC0 | encode));
7148 }
7149 
7150 void Assembler::incq(Address dst) {
7151   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7152   InstructionMark im(this);
7153   prefixq(dst);
7154   emit_int8((unsigned char)0xFF);
7155   emit_operand(rax, dst);
7156 }
7157 
7158 void Assembler::lea(Register dst, Address src) {
7159   leaq(dst, src);
7160 }
7161 
7162 void Assembler::leaq(Register dst, Address src) {
7163   InstructionMark im(this);
7164   prefixq(src, dst);
7165   emit_int8((unsigned char)0x8D);
7166   emit_operand(dst, src);
7167 }
7168 
7169 void Assembler::mov64(Register dst, int64_t imm64) {
7170   InstructionMark im(this);
7171   int encode = prefixq_and_encode(dst->encoding());
7172   emit_int8((unsigned char)(0xB8 | encode));
7173   emit_int64(imm64);
7174 }
7175 
7176 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
7177   InstructionMark im(this);
7178   int encode = prefixq_and_encode(dst->encoding());
7179   emit_int8(0xB8 | encode);
7180   emit_data64(imm64, rspec);
7181 }
7182 
7183 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7184   InstructionMark im(this);
7185   int encode = prefix_and_encode(dst->encoding());
7186   emit_int8((unsigned char)(0xB8 | encode));
7187   emit_data((int)imm32, rspec, narrow_oop_operand);
7188 }
7189 
7190 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
7191   InstructionMark im(this);
7192   prefix(dst);
7193   emit_int8((unsigned char)0xC7);
7194   emit_operand(rax, dst, 4);
7195   emit_data((int)imm32, rspec, narrow_oop_operand);
7196 }
7197 
7198 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7199   InstructionMark im(this);
7200   int encode = prefix_and_encode(src1->encoding());
7201   emit_int8((unsigned char)0x81);
7202   emit_int8((unsigned char)(0xF8 | encode));
7203   emit_data((int)imm32, rspec, narrow_oop_operand);
7204 }
7205 
7206 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7207   InstructionMark im(this);
7208   prefix(src1);
7209   emit_int8((unsigned char)0x81);
7210   emit_operand(rax, src1, 4);
7211   emit_data((int)imm32, rspec, narrow_oop_operand);
7212 }
7213 
7214 void Assembler::lzcntq(Register dst, Register src) {
7215   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
7216   emit_int8((unsigned char)0xF3);
7217   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7218   emit_int8(0x0F);
7219   emit_int8((unsigned char)0xBD);
7220   emit_int8((unsigned char)(0xC0 | encode));
7221 }
7222 
7223 void Assembler::movdq(XMMRegister dst, Register src) {
7224   // table D-1 says MMX/SSE2
7225   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7226   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
7227   emit_int8(0x6E);
7228   emit_int8((unsigned char)(0xC0 | encode));
7229 }
7230 
7231 void Assembler::movdq(Register dst, XMMRegister src) {
7232   // table D-1 says MMX/SSE2
7233   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7234   // swap src/dst to get correct prefix
7235   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
7236   emit_int8(0x7E);
7237   emit_int8((unsigned char)(0xC0 | encode));
7238 }
7239 
7240 void Assembler::movq(Register dst, Register src) {
7241   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7242   emit_int8((unsigned char)0x8B);
7243   emit_int8((unsigned char)(0xC0 | encode));
7244 }
7245 
7246 void Assembler::movq(Register dst, Address src) {
7247   InstructionMark im(this);
7248   prefixq(src, dst);
7249   emit_int8((unsigned char)0x8B);
7250   emit_operand(dst, src);
7251 }
7252 
7253 void Assembler::movq(Address dst, Register src) {
7254   InstructionMark im(this);
7255   prefixq(dst, src);
7256   emit_int8((unsigned char)0x89);
7257   emit_operand(src, dst);
7258 }
7259 
7260 void Assembler::movsbq(Register dst, Address src) {
7261   InstructionMark im(this);
7262   prefixq(src, dst);
7263   emit_int8(0x0F);
7264   emit_int8((unsigned char)0xBE);
7265   emit_operand(dst, src);
7266 }
7267 
7268 void Assembler::movsbq(Register dst, Register src) {
7269   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7270   emit_int8(0x0F);
7271   emit_int8((unsigned char)0xBE);
7272   emit_int8((unsigned char)(0xC0 | encode));
7273 }
7274 
7275 void Assembler::movslq(Register dst, int32_t imm32) {
7276   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
7277   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
7278   // as a result we shouldn't use until tested at runtime...
7279   ShouldNotReachHere();
7280   InstructionMark im(this);
7281   int encode = prefixq_and_encode(dst->encoding());
7282   emit_int8((unsigned char)(0xC7 | encode));
7283   emit_int32(imm32);
7284 }
7285 
7286 void Assembler::movslq(Address dst, int32_t imm32) {
7287   assert(is_simm32(imm32), "lost bits");
7288   InstructionMark im(this);
7289   prefixq(dst);
7290   emit_int8((unsigned char)0xC7);
7291   emit_operand(rax, dst, 4);
7292   emit_int32(imm32);
7293 }
7294 
7295 void Assembler::movslq(Register dst, Address src) {
7296   InstructionMark im(this);
7297   prefixq(src, dst);
7298   emit_int8(0x63);
7299   emit_operand(dst, src);
7300 }
7301 
7302 void Assembler::movslq(Register dst, Register src) {
7303   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7304   emit_int8(0x63);
7305   emit_int8((unsigned char)(0xC0 | encode));
7306 }
7307 
7308 void Assembler::movswq(Register dst, Address src) {
7309   InstructionMark im(this);
7310   prefixq(src, dst);
7311   emit_int8(0x0F);
7312   emit_int8((unsigned char)0xBF);
7313   emit_operand(dst, src);
7314 }
7315 
7316 void Assembler::movswq(Register dst, Register src) {
7317   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7318   emit_int8((unsigned char)0x0F);
7319   emit_int8((unsigned char)0xBF);
7320   emit_int8((unsigned char)(0xC0 | encode));
7321 }
7322 
7323 void Assembler::movzbq(Register dst, Address src) {
7324   InstructionMark im(this);
7325   prefixq(src, dst);
7326   emit_int8((unsigned char)0x0F);
7327   emit_int8((unsigned char)0xB6);
7328   emit_operand(dst, src);
7329 }
7330 
7331 void Assembler::movzbq(Register dst, Register src) {
7332   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7333   emit_int8(0x0F);
7334   emit_int8((unsigned char)0xB6);
7335   emit_int8(0xC0 | encode);
7336 }
7337 
7338 void Assembler::movzwq(Register dst, Address src) {
7339   InstructionMark im(this);
7340   prefixq(src, dst);
7341   emit_int8((unsigned char)0x0F);
7342   emit_int8((unsigned char)0xB7);
7343   emit_operand(dst, src);
7344 }
7345 
7346 void Assembler::movzwq(Register dst, Register src) {
7347   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7348   emit_int8((unsigned char)0x0F);
7349   emit_int8((unsigned char)0xB7);
7350   emit_int8((unsigned char)(0xC0 | encode));
7351 }
7352 
7353 void Assembler::mulq(Address src) {
7354   InstructionMark im(this);
7355   prefixq(src);
7356   emit_int8((unsigned char)0xF7);
7357   emit_operand(rsp, src);
7358 }
7359 
7360 void Assembler::mulq(Register src) {
7361   int encode = prefixq_and_encode(src->encoding());
7362   emit_int8((unsigned char)0xF7);
7363   emit_int8((unsigned char)(0xE0 | encode));
7364 }
7365 
7366 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
7367   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7368   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
7369                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7370   emit_int8((unsigned char)0xF6);
7371   emit_int8((unsigned char)(0xC0 | encode));
7372 }
7373 
7374 void Assembler::negq(Register dst) {
7375   int encode = prefixq_and_encode(dst->encoding());
7376   emit_int8((unsigned char)0xF7);
7377   emit_int8((unsigned char)(0xD8 | encode));
7378 }
7379 
7380 void Assembler::notq(Register dst) {
7381   int encode = prefixq_and_encode(dst->encoding());
7382   emit_int8((unsigned char)0xF7);
7383   emit_int8((unsigned char)(0xD0 | encode));
7384 }
7385 
7386 void Assembler::orq(Address dst, int32_t imm32) {
7387   InstructionMark im(this);
7388   prefixq(dst);
7389   emit_int8((unsigned char)0x81);
7390   emit_operand(rcx, dst, 4);
7391   emit_int32(imm32);
7392 }
7393 
7394 void Assembler::orq(Register dst, int32_t imm32) {
7395   (void) prefixq_and_encode(dst->encoding());
7396   emit_arith(0x81, 0xC8, dst, imm32);
7397 }
7398 
7399 void Assembler::orq(Register dst, Address src) {
7400   InstructionMark im(this);
7401   prefixq(src, dst);
7402   emit_int8(0x0B);
7403   emit_operand(dst, src);
7404 }
7405 
7406 void Assembler::orq(Register dst, Register src) {
7407   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7408   emit_arith(0x0B, 0xC0, dst, src);
7409 }
7410 
7411 void Assembler::popa() { // 64bit
7412   movq(r15, Address(rsp, 0));
7413   movq(r14, Address(rsp, wordSize));
7414   movq(r13, Address(rsp, 2 * wordSize));
7415   movq(r12, Address(rsp, 3 * wordSize));
7416   movq(r11, Address(rsp, 4 * wordSize));
7417   movq(r10, Address(rsp, 5 * wordSize));
7418   movq(r9,  Address(rsp, 6 * wordSize));
7419   movq(r8,  Address(rsp, 7 * wordSize));
7420   movq(rdi, Address(rsp, 8 * wordSize));
7421   movq(rsi, Address(rsp, 9 * wordSize));
7422   movq(rbp, Address(rsp, 10 * wordSize));
7423   // skip rsp
7424   movq(rbx, Address(rsp, 12 * wordSize));
7425   movq(rdx, Address(rsp, 13 * wordSize));
7426   movq(rcx, Address(rsp, 14 * wordSize));
7427   movq(rax, Address(rsp, 15 * wordSize));
7428 
7429   addq(rsp, 16 * wordSize);
7430 }
7431 
7432 void Assembler::popcntq(Register dst, Address src) {
7433   assert(VM_Version::supports_popcnt(), "must support");
7434   InstructionMark im(this);
7435   emit_int8((unsigned char)0xF3);
7436   prefixq(src, dst);
7437   emit_int8((unsigned char)0x0F);
7438   emit_int8((unsigned char)0xB8);
7439   emit_operand(dst, src);
7440 }
7441 
7442 void Assembler::popcntq(Register dst, Register src) {
7443   assert(VM_Version::supports_popcnt(), "must support");
7444   emit_int8((unsigned char)0xF3);
7445   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7446   emit_int8((unsigned char)0x0F);
7447   emit_int8((unsigned char)0xB8);
7448   emit_int8((unsigned char)(0xC0 | encode));
7449 }
7450 
7451 void Assembler::popq(Address dst) {
7452   InstructionMark im(this);
7453   prefixq(dst);
7454   emit_int8((unsigned char)0x8F);
7455   emit_operand(rax, dst);
7456 }
7457 
7458 void Assembler::pusha() { // 64bit
7459   // we have to store original rsp.  ABI says that 128 bytes
7460   // below rsp are local scratch.
7461   movq(Address(rsp, -5 * wordSize), rsp);
7462 
7463   subq(rsp, 16 * wordSize);
7464 
7465   movq(Address(rsp, 15 * wordSize), rax);
7466   movq(Address(rsp, 14 * wordSize), rcx);
7467   movq(Address(rsp, 13 * wordSize), rdx);
7468   movq(Address(rsp, 12 * wordSize), rbx);
7469   // skip rsp
7470   movq(Address(rsp, 10 * wordSize), rbp);
7471   movq(Address(rsp, 9 * wordSize), rsi);
7472   movq(Address(rsp, 8 * wordSize), rdi);
7473   movq(Address(rsp, 7 * wordSize), r8);
7474   movq(Address(rsp, 6 * wordSize), r9);
7475   movq(Address(rsp, 5 * wordSize), r10);
7476   movq(Address(rsp, 4 * wordSize), r11);
7477   movq(Address(rsp, 3 * wordSize), r12);
7478   movq(Address(rsp, 2 * wordSize), r13);
7479   movq(Address(rsp, wordSize), r14);
7480   movq(Address(rsp, 0), r15);
7481 }
7482 
7483 void Assembler::pushq(Address src) {
7484   InstructionMark im(this);
7485   prefixq(src);
7486   emit_int8((unsigned char)0xFF);
7487   emit_operand(rsi, src);
7488 }
7489 
7490 void Assembler::rclq(Register dst, int imm8) {
7491   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7492   int encode = prefixq_and_encode(dst->encoding());
7493   if (imm8 == 1) {
7494     emit_int8((unsigned char)0xD1);
7495     emit_int8((unsigned char)(0xD0 | encode));
7496   } else {
7497     emit_int8((unsigned char)0xC1);
7498     emit_int8((unsigned char)(0xD0 | encode));
7499     emit_int8(imm8);
7500   }
7501 }
7502 
7503 void Assembler::rcrq(Register dst, int imm8) {
7504   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7505   int encode = prefixq_and_encode(dst->encoding());
7506   if (imm8 == 1) {
7507     emit_int8((unsigned char)0xD1);
7508     emit_int8((unsigned char)(0xD8 | encode));
7509   } else {
7510     emit_int8((unsigned char)0xC1);
7511     emit_int8((unsigned char)(0xD8 | encode));
7512     emit_int8(imm8);
7513   }
7514 }
7515 
7516 void Assembler::rorq(Register dst, int imm8) {
7517   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7518   int encode = prefixq_and_encode(dst->encoding());
7519   if (imm8 == 1) {
7520     emit_int8((unsigned char)0xD1);
7521     emit_int8((unsigned char)(0xC8 | encode));
7522   } else {
7523     emit_int8((unsigned char)0xC1);
7524     emit_int8((unsigned char)(0xc8 | encode));
7525     emit_int8(imm8);
7526   }
7527 }
7528 
7529 void Assembler::rorxq(Register dst, Register src, int imm8) {
7530   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7531   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
7532                                      /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7533   emit_int8((unsigned char)0xF0);
7534   emit_int8((unsigned char)(0xC0 | encode));
7535   emit_int8(imm8);
7536 }
7537 
7538 void Assembler::sarq(Register dst, int imm8) {
7539   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7540   int encode = prefixq_and_encode(dst->encoding());
7541   if (imm8 == 1) {
7542     emit_int8((unsigned char)0xD1);
7543     emit_int8((unsigned char)(0xF8 | encode));
7544   } else {
7545     emit_int8((unsigned char)0xC1);
7546     emit_int8((unsigned char)(0xF8 | encode));
7547     emit_int8(imm8);
7548   }
7549 }
7550 
7551 void Assembler::sarq(Register dst) {
7552   int encode = prefixq_and_encode(dst->encoding());
7553   emit_int8((unsigned char)0xD3);
7554   emit_int8((unsigned char)(0xF8 | encode));
7555 }
7556 
7557 void Assembler::sbbq(Address dst, int32_t imm32) {
7558   InstructionMark im(this);
7559   prefixq(dst);
7560   emit_arith_operand(0x81, rbx, dst, imm32);
7561 }
7562 
7563 void Assembler::sbbq(Register dst, int32_t imm32) {
7564   (void) prefixq_and_encode(dst->encoding());
7565   emit_arith(0x81, 0xD8, dst, imm32);
7566 }
7567 
7568 void Assembler::sbbq(Register dst, Address src) {
7569   InstructionMark im(this);
7570   prefixq(src, dst);
7571   emit_int8(0x1B);
7572   emit_operand(dst, src);
7573 }
7574 
7575 void Assembler::sbbq(Register dst, Register src) {
7576   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7577   emit_arith(0x1B, 0xC0, dst, src);
7578 }
7579 
7580 void Assembler::shlq(Register dst, int imm8) {
7581   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7582   int encode = prefixq_and_encode(dst->encoding());
7583   if (imm8 == 1) {
7584     emit_int8((unsigned char)0xD1);
7585     emit_int8((unsigned char)(0xE0 | encode));
7586   } else {
7587     emit_int8((unsigned char)0xC1);
7588     emit_int8((unsigned char)(0xE0 | encode));
7589     emit_int8(imm8);
7590   }
7591 }
7592 
7593 void Assembler::shlq(Register dst) {
7594   int encode = prefixq_and_encode(dst->encoding());
7595   emit_int8((unsigned char)0xD3);
7596   emit_int8((unsigned char)(0xE0 | encode));
7597 }
7598 
7599 void Assembler::shrq(Register dst, int imm8) {
7600   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7601   int encode = prefixq_and_encode(dst->encoding());
7602   emit_int8((unsigned char)0xC1);
7603   emit_int8((unsigned char)(0xE8 | encode));
7604   emit_int8(imm8);
7605 }
7606 
7607 void Assembler::shrq(Register dst) {
7608   int encode = prefixq_and_encode(dst->encoding());
7609   emit_int8((unsigned char)0xD3);
7610   emit_int8(0xE8 | encode);
7611 }
7612 
7613 void Assembler::subq(Address dst, int32_t imm32) {
7614   InstructionMark im(this);
7615   prefixq(dst);
7616   emit_arith_operand(0x81, rbp, dst, imm32);
7617 }
7618 
7619 void Assembler::subq(Address dst, Register src) {
7620   InstructionMark im(this);
7621   prefixq(dst, src);
7622   emit_int8(0x29);
7623   emit_operand(src, dst);
7624 }
7625 
7626 void Assembler::subq(Register dst, int32_t imm32) {
7627   (void) prefixq_and_encode(dst->encoding());
7628   emit_arith(0x81, 0xE8, dst, imm32);
7629 }
7630 
7631 // Force generation of a 4 byte immediate value even if it fits into 8bit
7632 void Assembler::subq_imm32(Register dst, int32_t imm32) {
7633   (void) prefixq_and_encode(dst->encoding());
7634   emit_arith_imm32(0x81, 0xE8, dst, imm32);
7635 }
7636 
7637 void Assembler::subq(Register dst, Address src) {
7638   InstructionMark im(this);
7639   prefixq(src, dst);
7640   emit_int8(0x2B);
7641   emit_operand(dst, src);
7642 }
7643 
7644 void Assembler::subq(Register dst, Register src) {
7645   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7646   emit_arith(0x2B, 0xC0, dst, src);
7647 }
7648 
7649 void Assembler::testq(Register dst, int32_t imm32) {
7650   // not using emit_arith because test
7651   // doesn't support sign-extension of
7652   // 8bit operands
7653   int encode = dst->encoding();
7654   if (encode == 0) {
7655     prefix(REX_W);
7656     emit_int8((unsigned char)0xA9);
7657   } else {
7658     encode = prefixq_and_encode(encode);
7659     emit_int8((unsigned char)0xF7);
7660     emit_int8((unsigned char)(0xC0 | encode));
7661   }
7662   emit_int32(imm32);
7663 }
7664 
7665 void Assembler::testq(Register dst, Register src) {
7666   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7667   emit_arith(0x85, 0xC0, dst, src);
7668 }
7669 
7670 void Assembler::xaddq(Address dst, Register src) {
7671   InstructionMark im(this);
7672   prefixq(dst, src);
7673   emit_int8(0x0F);
7674   emit_int8((unsigned char)0xC1);
7675   emit_operand(src, dst);
7676 }
7677 
7678 void Assembler::xchgq(Register dst, Address src) {
7679   InstructionMark im(this);
7680   prefixq(src, dst);
7681   emit_int8((unsigned char)0x87);
7682   emit_operand(dst, src);
7683 }
7684 
7685 void Assembler::xchgq(Register dst, Register src) {
7686   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7687   emit_int8((unsigned char)0x87);
7688   emit_int8((unsigned char)(0xc0 | encode));
7689 }
7690 
7691 void Assembler::xorq(Register dst, Register src) {
7692   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7693   emit_arith(0x33, 0xC0, dst, src);
7694 }
7695 
7696 void Assembler::xorq(Register dst, Address src) {
7697   InstructionMark im(this);
7698   prefixq(src, dst);
7699   emit_int8(0x33);
7700   emit_operand(dst, src);
7701 }
7702 
7703 #endif // !LP64