New src/cpu/x86/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && _is_evex_instruction) {
 398     switch (_tuple_type) {
 399     case EVEX_FV:
 400       if ((_evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 402       } else {
 403         mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (_input_size_in_bits) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
 463       int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
 464       if ((disp % disp_factor) == 0) {
 465         int new_disp = disp / disp_factor;
 466         if (is8bit(new_disp)) {
 467           disp = new_disp;
 468         }
 469       } else {
 470         return false;
 471       }
 472     }
 473   }
 474   return is8bit(disp);
 475 }
 476 
 477 
 478 void Assembler::emit_operand(Register reg, Register base, Register index,
 479                              Address::ScaleFactor scale, int disp,
 480                              RelocationHolder const& rspec,
 481                              int rip_relative_correction) {
 482   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 483 
 484   // Encode the registers as needed in the fields they are used in
 485 
 486   int regenc = encode(reg) << 3;
 487   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 488   int baseenc = base->is_valid() ? encode(base) : 0;
 489 
 490   if (base->is_valid()) {
 491     if (index->is_valid()) {
 492       assert(scale != Address::no_scale, "inconsistent address");
 493       // [base + index*scale + disp]
 494       if (disp == 0 && rtype == relocInfo::none  &&
 495           base != rbp LP64_ONLY(&& base != r13)) {
 496         // [base + index*scale]
 497         // [00 reg 100][ss index base]
 498         assert(index != rsp, "illegal addressing mode");
 499         emit_int8(0x04 | regenc);
 500         emit_int8(scale << 6 | indexenc | baseenc);
 501       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 502         // [base + index*scale + imm8]
 503         // [01 reg 100][ss index base] imm8
 504         assert(index != rsp, "illegal addressing mode");
 505         emit_int8(0x44 | regenc);
 506         emit_int8(scale << 6 | indexenc | baseenc);
 507         emit_int8(disp & 0xFF);
 508       } else {
 509         // [base + index*scale + disp32]
 510         // [10 reg 100][ss index base] disp32
 511         assert(index != rsp, "illegal addressing mode");
 512         emit_int8(0x84 | regenc);
 513         emit_int8(scale << 6 | indexenc | baseenc);
 514         emit_data(disp, rspec, disp32_operand);
 515       }
 516     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 517       // [rsp + disp]
 518       if (disp == 0 && rtype == relocInfo::none) {
 519         // [rsp]
 520         // [00 reg 100][00 100 100]
 521         emit_int8(0x04 | regenc);
 522         emit_int8(0x24);
 523       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 524         // [rsp + imm8]
 525         // [01 reg 100][00 100 100] disp8
 526         emit_int8(0x44 | regenc);
 527         emit_int8(0x24);
 528         emit_int8(disp & 0xFF);
 529       } else {
 530         // [rsp + imm32]
 531         // [10 reg 100][00 100 100] disp32
 532         emit_int8(0x84 | regenc);
 533         emit_int8(0x24);
 534         emit_data(disp, rspec, disp32_operand);
 535       }
 536     } else {
 537       // [base + disp]
 538       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 539       if (disp == 0 && rtype == relocInfo::none &&
 540           base != rbp LP64_ONLY(&& base != r13)) {
 541         // [base]
 542         // [00 reg base]
 543         emit_int8(0x00 | regenc | baseenc);
 544       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 545         // [base + disp8]
 546         // [01 reg base] disp8
 547         emit_int8(0x40 | regenc | baseenc);
 548         emit_int8(disp & 0xFF);
 549       } else {
 550         // [base + disp32]
 551         // [10 reg base] disp32
 552         emit_int8(0x80 | regenc | baseenc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     }
 556   } else {
 557     if (index->is_valid()) {
 558       assert(scale != Address::no_scale, "inconsistent address");
 559       // [index*scale + disp]
 560       // [00 reg 100][ss index 101] disp32
 561       assert(index != rsp, "illegal addressing mode");
 562       emit_int8(0x04 | regenc);
 563       emit_int8(scale << 6 | indexenc | 0x05);
 564       emit_data(disp, rspec, disp32_operand);
 565     } else if (rtype != relocInfo::none ) {
 566       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 567       // [00 000 101] disp32
 568 
 569       emit_int8(0x05 | regenc);
 570       // Note that the RIP-rel. correction applies to the generated
 571       // disp field, but _not_ to the target address in the rspec.
 572 
 573       // disp was created by converting the target address minus the pc
 574       // at the start of the instruction. That needs more correction here.
 575       // intptr_t disp = target - next_ip;
 576       assert(inst_mark() != NULL, "must be inside InstructionMark");
 577       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 578       int64_t adjusted = disp;
 579       // Do rip-rel adjustment for 64bit
 580       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 581       assert(is_simm32(adjusted),
 582              "must be 32bit offset (RIP relative address)");
 583       emit_data((int32_t) adjusted, rspec, disp32_operand);
 584 
 585     } else {
 586       // 32bit never did this, did everything as the rip-rel/disp code above
 587       // [disp] ABSOLUTE
 588       // [00 reg 100][00 100 101] disp32
 589       emit_int8(0x04 | regenc);
 590       emit_int8(0x25);
 591       emit_data(disp, rspec, disp32_operand);
 592     }
 593   }
 594   _is_evex_instruction = false;
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;
 628   bool is_64bit = false;
 629 
 630   debug_only(bool has_disp32 = false);
 631   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 632 
 633   again_after_prefix:
 634   switch (0xFF & *ip++) {
 635 
 636   // These convenience macros generate groups of "case" labels for the switch.
 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 639              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 640 #define REP16(x) REP8((x)+0): \
 641               case REP8((x)+8)
 642 
 643   case CS_segment:
 644   case SS_segment:
 645   case DS_segment:
 646   case ES_segment:
 647   case FS_segment:
 648   case GS_segment:
 649     // Seems dubious
 650     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 651     assert(ip == inst+1, "only one prefix allowed");
 652     goto again_after_prefix;
 653 
 654   case 0x67:
 655   case REX:
 656   case REX_B:
 657   case REX_X:
 658   case REX_XB:
 659   case REX_R:
 660   case REX_RB:
 661   case REX_RX:
 662   case REX_RXB:
 663     NOT_LP64(assert(false, "64bit prefixes"));
 664     goto again_after_prefix;
 665 
 666   case REX_W:
 667   case REX_WB:
 668   case REX_WX:
 669   case REX_WXB:
 670   case REX_WR:
 671   case REX_WRB:
 672   case REX_WRX:
 673   case REX_WRXB:
 674     NOT_LP64(assert(false, "64bit prefixes"));
 675     is_64bit = true;
 676     goto again_after_prefix;
 677 
 678   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 679   case 0x88: // movb a, r
 680   case 0x89: // movl a, r
 681   case 0x8A: // movb r, a
 682   case 0x8B: // movl r, a
 683   case 0x8F: // popl a
 684     debug_only(has_disp32 = true);
 685     break;
 686 
 687   case 0x68: // pushq #32
 688     if (which == end_pc_operand) {
 689       return ip + 4;
 690     }
 691     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 692     return ip;                  // not produced by emit_operand
 693 
 694   case 0x66: // movw ... (size prefix)
 695     again_after_size_prefix2:
 696     switch (0xFF & *ip++) {
 697     case REX:
 698     case REX_B:
 699     case REX_X:
 700     case REX_XB:
 701     case REX_R:
 702     case REX_RB:
 703     case REX_RX:
 704     case REX_RXB:
 705     case REX_W:
 706     case REX_WB:
 707     case REX_WX:
 708     case REX_WXB:
 709     case REX_WR:
 710     case REX_WRB:
 711     case REX_WRX:
 712     case REX_WRXB:
 713       NOT_LP64(assert(false, "64bit prefix found"));
 714       goto again_after_size_prefix2;
 715     case 0x8B: // movw r, a
 716     case 0x89: // movw a, r
 717       debug_only(has_disp32 = true);
 718       break;
 719     case 0xC7: // movw a, #16
 720       debug_only(has_disp32 = true);
 721       tail_size = 2;  // the imm16
 722       break;
 723     case 0x0F: // several SSE/SSE2 variants
 724       ip--;    // reparse the 0x0F
 725       goto again_after_prefix;
 726     default:
 727       ShouldNotReachHere();
 728     }
 729     break;
 730 
 731   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 732     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 733     // these asserts are somewhat nonsensical
 734 #ifndef _LP64
 735     assert(which == imm_operand || which == disp32_operand,
 736            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 737 #else
 738     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 739            which == narrow_oop_operand && !is_64bit,
 740            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 741 #endif // _LP64
 742     return ip;
 743 
 744   case 0x69: // imul r, a, #32
 745   case 0xC7: // movl a, #32(oop?)
 746     tail_size = 4;
 747     debug_only(has_disp32 = true); // has both kinds of operands!
 748     break;
 749 
 750   case 0x0F: // movx..., etc.
 751     switch (0xFF & *ip++) {
 752     case 0x3A: // pcmpestri
 753       tail_size = 1;
 754     case 0x38: // ptest, pmovzxbw
 755       ip++; // skip opcode
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757       break;
 758 
 759     case 0x70: // pshufd r, r/a, #8
 760       debug_only(has_disp32 = true); // has both kinds of operands!
 761     case 0x73: // psrldq r, #8
 762       tail_size = 1;
 763       break;
 764 
 765     case 0x12: // movlps
 766     case 0x28: // movaps
 767     case 0x2E: // ucomiss
 768     case 0x2F: // comiss
 769     case 0x54: // andps
 770     case 0x55: // andnps
 771     case 0x56: // orps
 772     case 0x57: // xorps
 773     case 0x59: //mulpd
 774     case 0x6E: // movd
 775     case 0x7E: // movd
 776     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 777       debug_only(has_disp32 = true);
 778       break;
 779 
 780     case 0xAD: // shrd r, a, %cl
 781     case 0xAF: // imul r, a
 782     case 0xBE: // movsbl r, a (movsxb)
 783     case 0xBF: // movswl r, a (movsxw)
 784     case 0xB6: // movzbl r, a (movzxb)
 785     case 0xB7: // movzwl r, a (movzxw)
 786     case REP16(0x40): // cmovl cc, r, a
 787     case 0xB0: // cmpxchgb
 788     case 0xB1: // cmpxchg
 789     case 0xC1: // xaddl
 790     case 0xC7: // cmpxchg8
 791     case REP16(0x90): // setcc a
 792       debug_only(has_disp32 = true);
 793       // fall out of the switch to decode the address
 794       break;
 795 
 796     case 0xC4: // pinsrw r, a, #8
 797       debug_only(has_disp32 = true);
 798     case 0xC5: // pextrw r, r, #8
 799       tail_size = 1;  // the imm8
 800       break;
 801 
 802     case 0xAC: // shrd r, a, #8
 803       debug_only(has_disp32 = true);
 804       tail_size = 1;  // the imm8
 805       break;
 806 
 807     case REP16(0x80): // jcc rdisp32
 808       if (which == end_pc_operand)  return ip + 4;
 809       assert(which == call32_operand, "jcc has no disp32 or imm");
 810       return ip;
 811     default:
 812       ShouldNotReachHere();
 813     }
 814     break;
 815 
 816   case 0x81: // addl a, #32; addl r, #32
 817     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 818     // on 32bit in the case of cmpl, the imm might be an oop
 819     tail_size = 4;
 820     debug_only(has_disp32 = true); // has both kinds of operands!
 821     break;
 822 
 823   case 0x83: // addl a, #8; addl r, #8
 824     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 825     debug_only(has_disp32 = true); // has both kinds of operands!
 826     tail_size = 1;
 827     break;
 828 
 829   case 0x9B:
 830     switch (0xFF & *ip++) {
 831     case 0xD9: // fnstcw a
 832       debug_only(has_disp32 = true);
 833       break;
 834     default:
 835       ShouldNotReachHere();
 836     }
 837     break;
 838 
 839   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 840   case REP4(0x10): // adc...
 841   case REP4(0x20): // and...
 842   case REP4(0x30): // xor...
 843   case REP4(0x08): // or...
 844   case REP4(0x18): // sbb...
 845   case REP4(0x28): // sub...
 846   case 0xF7: // mull a
 847   case 0x8D: // lea r, a
 848   case 0x87: // xchg r, a
 849   case REP4(0x38): // cmp...
 850   case 0x85: // test r, a
 851     debug_only(has_disp32 = true); // has both kinds of operands!
 852     break;
 853 
 854   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 855   case 0xC6: // movb a, #8
 856   case 0x80: // cmpb a, #8
 857   case 0x6B: // imul r, a, #8
 858     debug_only(has_disp32 = true); // has both kinds of operands!
 859     tail_size = 1; // the imm8
 860     break;
 861 
 862   case 0xC4: // VEX_3bytes
 863   case 0xC5: // VEX_2bytes
 864     assert((UseAVX > 0), "shouldn't have VEX prefix");
 865     assert(ip == inst+1, "no prefixes allowed");
 866     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 867     // but they have prefix 0x0F and processed when 0x0F processed above.
 868     //
 869     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 870     // instructions (these instructions are not supported in 64-bit mode).
 871     // To distinguish them bits [7:6] are set in the VEX second byte since
 872     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 873     // those VEX bits REX and vvvv bits are inverted.
 874     //
 875     // Fortunately C2 doesn't generate these instructions so we don't need
 876     // to check for them in product version.
 877 
 878     // Check second byte
 879     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 880 
 881     int vex_opcode;
 882     // First byte
 883     if ((0xFF & *inst) == VEX_3bytes) {
 884       vex_opcode = VEX_OPCODE_MASK & *ip;
 885       ip++; // third byte
 886       is_64bit = ((VEX_W & *ip) == VEX_W);
 887     } else {
 888       vex_opcode = VEX_OPCODE_0F;
 889     }
 890     ip++; // opcode
 891     // To find the end of instruction (which == end_pc_operand).
 892     switch (vex_opcode) {
 893       case VEX_OPCODE_0F:
 894         switch (0xFF & *ip) {
 895         case 0x70: // pshufd r, r/a, #8
 896         case 0x71: // ps[rl|ra|ll]w r, #8
 897         case 0x72: // ps[rl|ra|ll]d r, #8
 898         case 0x73: // ps[rl|ra|ll]q r, #8
 899         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 900         case 0xC4: // pinsrw r, r, r/a, #8
 901         case 0xC5: // pextrw r/a, r, #8
 902         case 0xC6: // shufp[s|d] r, r, r/a, #8
 903           tail_size = 1;  // the imm8
 904           break;
 905         }
 906         break;
 907       case VEX_OPCODE_0F_3A:
 908         tail_size = 1;
 909         break;
 910     }
 911     ip++; // skip opcode
 912     debug_only(has_disp32 = true); // has both kinds of operands!
 913     break;
 914 
 915   case 0x62: // EVEX_4bytes
 916     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 917     assert(ip == inst+1, "no prefixes allowed");
 918     // no EVEX collisions, all instructions that have 0x62 opcodes
 919     // have EVEX versions and are subopcodes of 0x66
 920     ip++; // skip P0 and exmaine W in P1
 921     is_64bit = ((VEX_W & *ip) == VEX_W);
 922     ip++; // move to P2
 923     ip++; // skip P2, move to opcode
 924     // To find the end of instruction (which == end_pc_operand).
 925     switch (0xFF & *ip) {
 926     case 0x61: // pcmpestri r, r/a, #8
 927     case 0x70: // pshufd r, r/a, #8
 928     case 0x73: // psrldq r, #8
 929       tail_size = 1;  // the imm8
 930       break;
 931     default:
 932       break;
 933     }
 934     ip++; // skip opcode
 935     debug_only(has_disp32 = true); // has both kinds of operands!
 936     break;
 937 
 938   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 939   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 940   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 941   case 0xDD: // fld_d a; fst_d a; fstp_d a
 942   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 943   case 0xDF: // fild_d a; fistp_d a
 944   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 945   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 946   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 947     debug_only(has_disp32 = true);
 948     break;
 949 
 950   case 0xE8: // call rdisp32
 951   case 0xE9: // jmp  rdisp32
 952     if (which == end_pc_operand)  return ip + 4;
 953     assert(which == call32_operand, "call has no disp32 or imm");
 954     return ip;
 955 
 956   case 0xF0:                    // Lock
 957     assert(os::is_MP(), "only on MP");
 958     goto again_after_prefix;
 959 
 960   case 0xF3:                    // For SSE
 961   case 0xF2:                    // For SSE2
 962     switch (0xFF & *ip++) {
 963     case REX:
 964     case REX_B:
 965     case REX_X:
 966     case REX_XB:
 967     case REX_R:
 968     case REX_RB:
 969     case REX_RX:
 970     case REX_RXB:
 971     case REX_W:
 972     case REX_WB:
 973     case REX_WX:
 974     case REX_WXB:
 975     case REX_WR:
 976     case REX_WRB:
 977     case REX_WRX:
 978     case REX_WRXB:
 979       NOT_LP64(assert(false, "found 64bit prefix"));
 980       ip++;
 981     default:
 982       ip++;
 983     }
 984     debug_only(has_disp32 = true); // has both kinds of operands!
 985     break;
 986 
 987   default:
 988     ShouldNotReachHere();
 989 
 990 #undef REP8
 991 #undef REP16
 992   }
 993 
 994   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 995 #ifdef _LP64
 996   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 997 #else
 998   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 999   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1000 #endif // LP64
1001   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1002 
1003   // parse the output of emit_operand
1004   int op2 = 0xFF & *ip++;
1005   int base = op2 & 0x07;
1006   int op3 = -1;
1007   const int b100 = 4;
1008   const int b101 = 5;
1009   if (base == b100 && (op2 >> 6) != 3) {
1010     op3 = 0xFF & *ip++;
1011     base = op3 & 0x07;   // refetch the base
1012   }
1013   // now ip points at the disp (if any)
1014 
1015   switch (op2 >> 6) {
1016   case 0:
1017     // [00 reg  100][ss index base]
1018     // [00 reg  100][00   100  esp]
1019     // [00 reg base]
1020     // [00 reg  100][ss index  101][disp32]
1021     // [00 reg  101]               [disp32]
1022 
1023     if (base == b101) {
1024       if (which == disp32_operand)
1025         return ip;              // caller wants the disp32
1026       ip += 4;                  // skip the disp32
1027     }
1028     break;
1029 
1030   case 1:
1031     // [01 reg  100][ss index base][disp8]
1032     // [01 reg  100][00   100  esp][disp8]
1033     // [01 reg base]               [disp8]
1034     ip += 1;                    // skip the disp8
1035     break;
1036 
1037   case 2:
1038     // [10 reg  100][ss index base][disp32]
1039     // [10 reg  100][00   100  esp][disp32]
1040     // [10 reg base]               [disp32]
1041     if (which == disp32_operand)
1042       return ip;                // caller wants the disp32
1043     ip += 4;                    // skip the disp32
1044     break;
1045 
1046   case 3:
1047     // [11 reg base]  (not a memory addressing mode)
1048     break;
1049   }
1050 
1051   if (which == end_pc_operand) {
1052     return ip + tail_size;
1053   }
1054 
1055 #ifdef _LP64
1056   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1057 #else
1058   assert(which == imm_operand, "instruction has only an imm field");
1059 #endif // LP64
1060   return ip;
1061 }
1062 
1063 address Assembler::locate_next_instruction(address inst) {
1064   // Secretly share code with locate_operand:
1065   return locate_operand(inst, end_pc_operand);
1066 }
1067 
1068 
1069 #ifdef ASSERT
1070 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1071   address inst = inst_mark();
1072   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1073   address opnd;
1074 
1075   Relocation* r = rspec.reloc();
1076   if (r->type() == relocInfo::none) {
1077     return;
1078   } else if (r->is_call() || format == call32_operand) {
1079     // assert(format == imm32_operand, "cannot specify a nonzero format");
1080     opnd = locate_operand(inst, call32_operand);
1081   } else if (r->is_data()) {
1082     assert(format == imm_operand || format == disp32_operand
1083            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1084     opnd = locate_operand(inst, (WhichOperand)format);
1085   } else {
1086     assert(format == imm_operand, "cannot specify a format");
1087     return;
1088   }
1089   assert(opnd == pc(), "must put operand where relocs can find it");
1090 }
1091 #endif // ASSERT
1092 
1093 void Assembler::emit_operand32(Register reg, Address adr) {
1094   assert(reg->encoding() < 8, "no extended registers");
1095   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1096   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1097                adr._rspec);
1098 }
1099 
1100 void Assembler::emit_operand(Register reg, Address adr,
1101                              int rip_relative_correction) {
1102   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1103                adr._rspec,
1104                rip_relative_correction);
1105 }
1106 
1107 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1108   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1109                adr._rspec);
1110 }
1111 
1112 // MMX operations
1113 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1114   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1115   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1116 }
1117 
1118 // work around gcc (3.2.1-7a) bug
1119 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1120   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1121   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1122 }
1123 
1124 
1125 void Assembler::emit_farith(int b1, int b2, int i) {
1126   assert(isByte(b1) && isByte(b2), "wrong opcode");
1127   assert(0 <= i &&  i < 8, "illegal stack offset");
1128   emit_int8(b1);
1129   emit_int8(b2 + i);
1130 }
1131 
1132 
1133 // Now the Assembler instructions (identical for 32/64 bits)
1134 
1135 void Assembler::adcl(Address dst, int32_t imm32) {
1136   InstructionMark im(this);
1137   prefix(dst);
1138   emit_arith_operand(0x81, rdx, dst, imm32);
1139 }
1140 
1141 void Assembler::adcl(Address dst, Register src) {
1142   InstructionMark im(this);
1143   prefix(dst, src);
1144   emit_int8(0x11);
1145   emit_operand(src, dst);
1146 }
1147 
1148 void Assembler::adcl(Register dst, int32_t imm32) {
1149   prefix(dst);
1150   emit_arith(0x81, 0xD0, dst, imm32);
1151 }
1152 
1153 void Assembler::adcl(Register dst, Address src) {
1154   InstructionMark im(this);
1155   prefix(src, dst);
1156   emit_int8(0x13);
1157   emit_operand(dst, src);
1158 }
1159 
1160 void Assembler::adcl(Register dst, Register src) {
1161   (void) prefix_and_encode(dst->encoding(), src->encoding());
1162   emit_arith(0x13, 0xC0, dst, src);
1163 }
1164 
1165 void Assembler::addl(Address dst, int32_t imm32) {
1166   InstructionMark im(this);
1167   prefix(dst);
1168   emit_arith_operand(0x81, rax, dst, imm32);
1169 }
1170 
1171 void Assembler::addl(Address dst, Register src) {
1172   InstructionMark im(this);
1173   prefix(dst, src);
1174   emit_int8(0x01);
1175   emit_operand(src, dst);
1176 }
1177 
1178 void Assembler::addl(Register dst, int32_t imm32) {
1179   prefix(dst);
1180   emit_arith(0x81, 0xC0, dst, imm32);
1181 }
1182 
1183 void Assembler::addl(Register dst, Address src) {
1184   InstructionMark im(this);
1185   prefix(src, dst);
1186   emit_int8(0x03);
1187   emit_operand(dst, src);
1188 }
1189 
1190 void Assembler::addl(Register dst, Register src) {
1191   (void) prefix_and_encode(dst->encoding(), src->encoding());
1192   emit_arith(0x03, 0xC0, dst, src);
1193 }
1194 
1195 void Assembler::addr_nop_4() {
1196   assert(UseAddressNop, "no CPU support");
1197   // 4 bytes: NOP DWORD PTR [EAX+0]
1198   emit_int8(0x0F);
1199   emit_int8(0x1F);
1200   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1201   emit_int8(0);    // 8-bits offset (1 byte)
1202 }
1203 
1204 void Assembler::addr_nop_5() {
1205   assert(UseAddressNop, "no CPU support");
1206   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1207   emit_int8(0x0F);
1208   emit_int8(0x1F);
1209   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1210   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1211   emit_int8(0);    // 8-bits offset (1 byte)
1212 }
1213 
1214 void Assembler::addr_nop_7() {
1215   assert(UseAddressNop, "no CPU support");
1216   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1217   emit_int8(0x0F);
1218   emit_int8(0x1F);
1219   emit_int8((unsigned char)0x80);
1220                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1221   emit_int32(0);   // 32-bits offset (4 bytes)
1222 }
1223 
1224 void Assembler::addr_nop_8() {
1225   assert(UseAddressNop, "no CPU support");
1226   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1227   emit_int8(0x0F);
1228   emit_int8(0x1F);
1229   emit_int8((unsigned char)0x84);
1230                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1231   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1232   emit_int32(0);   // 32-bits offset (4 bytes)
1233 }
1234 
1235 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1236   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1237   if (VM_Version::supports_evex()) {
1238     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1239   } else {
1240     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1241   }
1242 }
1243 
1244 void Assembler::addsd(XMMRegister dst, Address src) {
1245   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1246   if (VM_Version::supports_evex()) {
1247     _tuple_type = EVEX_T1S;
1248     _input_size_in_bits = EVEX_64bit;
1249     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1250   } else {
1251     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1252   }
1253 }
1254 
1255 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1257   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1258 }
1259 
1260 void Assembler::addss(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1262   if (VM_Version::supports_evex()) {
1263     _tuple_type = EVEX_T1S;
1264     _input_size_in_bits = EVEX_32bit;
1265   }
1266   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1267 }
1268 
1269 void Assembler::aesdec(XMMRegister dst, Address src) {
1270   assert(VM_Version::supports_aes(), "");
1271   InstructionMark im(this);
1272   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1273               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1274   emit_int8((unsigned char)0xDE);
1275   emit_operand(dst, src);
1276 }
1277 
1278 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1279   assert(VM_Version::supports_aes(), "");
1280   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1281                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1282   emit_int8((unsigned char)0xDE);
1283   emit_int8(0xC0 | encode);
1284 }
1285 
1286 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1287   assert(VM_Version::supports_aes(), "");
1288   InstructionMark im(this);
1289   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1290               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1291   emit_int8((unsigned char)0xDF);
1292   emit_operand(dst, src);
1293 }
1294 
1295 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1296   assert(VM_Version::supports_aes(), "");
1297   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1298                                       VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1299   emit_int8((unsigned char)0xDF);
1300   emit_int8((unsigned char)(0xC0 | encode));
1301 }
1302 
1303 void Assembler::aesenc(XMMRegister dst, Address src) {
1304   assert(VM_Version::supports_aes(), "");
1305   InstructionMark im(this);
1306   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1307               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1308   emit_int8((unsigned char)0xDC);
1309   emit_operand(dst, src);
1310 }
1311 
1312 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1313   assert(VM_Version::supports_aes(), "");
1314   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1315                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1316   emit_int8((unsigned char)0xDC);
1317   emit_int8(0xC0 | encode);
1318 }
1319 
1320 void Assembler::aesenclast(XMMRegister dst, Address src) {
1321   assert(VM_Version::supports_aes(), "");
1322   InstructionMark im(this);
1323   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1324               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
1325   emit_int8((unsigned char)0xDD);
1326   emit_operand(dst, src);
1327 }
1328 
1329 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1330   assert(VM_Version::supports_aes(), "");
1331   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
1332                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
1333   emit_int8((unsigned char)0xDD);
1334   emit_int8((unsigned char)(0xC0 | encode));
1335 }
1336 
1337 void Assembler::andl(Address dst, int32_t imm32) {
1338   InstructionMark im(this);
1339   prefix(dst);
1340   emit_int8((unsigned char)0x81);
1341   emit_operand(rsp, dst, 4);
1342   emit_int32(imm32);
1343 }
1344 
1345 void Assembler::andl(Register dst, int32_t imm32) {
1346   prefix(dst);
1347   emit_arith(0x81, 0xE0, dst, imm32);
1348 }
1349 
1350 void Assembler::andl(Register dst, Address src) {
1351   InstructionMark im(this);
1352   prefix(src, dst);
1353   emit_int8(0x23);
1354   emit_operand(dst, src);
1355 }
1356 
1357 void Assembler::andl(Register dst, Register src) {
1358   (void) prefix_and_encode(dst->encoding(), src->encoding());
1359   emit_arith(0x23, 0xC0, dst, src);
1360 }
1361 
1362 void Assembler::andnl(Register dst, Register src1, Register src2) {
1363   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1364   int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
1365   emit_int8((unsigned char)0xF2);
1366   emit_int8((unsigned char)(0xC0 | encode));
1367 }
1368 
1369 void Assembler::andnl(Register dst, Register src1, Address src2) {
1370   InstructionMark im(this);
1371   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1372   vex_prefix_0F38_legacy(dst, src1, src2);
1373   emit_int8((unsigned char)0xF2);
1374   emit_operand(dst, src2);
1375 }
1376 
1377 void Assembler::bsfl(Register dst, Register src) {
1378   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_int8(0x0F);
1380   emit_int8((unsigned char)0xBC);
1381   emit_int8((unsigned char)(0xC0 | encode));
1382 }
1383 
1384 void Assembler::bsrl(Register dst, Register src) {
1385   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1386   emit_int8(0x0F);
1387   emit_int8((unsigned char)0xBD);
1388   emit_int8((unsigned char)(0xC0 | encode));
1389 }
1390 
1391 void Assembler::bswapl(Register reg) { // bswap
1392   int encode = prefix_and_encode(reg->encoding());
1393   emit_int8(0x0F);
1394   emit_int8((unsigned char)(0xC8 | encode));
1395 }
1396 
1397 void Assembler::blsil(Register dst, Register src) {
1398   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1399   int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
1400   emit_int8((unsigned char)0xF3);
1401   emit_int8((unsigned char)(0xC0 | encode));
1402 }
1403 
1404 void Assembler::blsil(Register dst, Address src) {
1405   InstructionMark im(this);
1406   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1407   vex_prefix_0F38_legacy(rbx, dst, src);
1408   emit_int8((unsigned char)0xF3);
1409   emit_operand(rbx, src);
1410 }
1411 
1412 void Assembler::blsmskl(Register dst, Register src) {
1413   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1414   int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
1415   emit_int8((unsigned char)0xF3);
1416   emit_int8((unsigned char)(0xC0 | encode));
1417 }
1418 
1419 void Assembler::blsmskl(Register dst, Address src) {
1420   InstructionMark im(this);
1421   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1422   vex_prefix_0F38_legacy(rdx, dst, src);
1423   emit_int8((unsigned char)0xF3);
1424   emit_operand(rdx, src);
1425 }
1426 
1427 void Assembler::blsrl(Register dst, Register src) {
1428   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1429   int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
1430   emit_int8((unsigned char)0xF3);
1431   emit_int8((unsigned char)(0xC0 | encode));
1432 }
1433 
1434 void Assembler::blsrl(Register dst, Address src) {
1435   InstructionMark im(this);
1436   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1437   vex_prefix_0F38_legacy(rcx, dst, src);
1438   emit_int8((unsigned char)0xF3);
1439   emit_operand(rcx, src);
1440 }
1441 
1442 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1443   // suspect disp32 is always good
1444   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1445 
1446   if (L.is_bound()) {
1447     const int long_size = 5;
1448     int offs = (int)( target(L) - pc() );
1449     assert(offs <= 0, "assembler error");
1450     InstructionMark im(this);
1451     // 1110 1000 #32-bit disp
1452     emit_int8((unsigned char)0xE8);
1453     emit_data(offs - long_size, rtype, operand);
1454   } else {
1455     InstructionMark im(this);
1456     // 1110 1000 #32-bit disp
1457     L.add_patch_at(code(), locator());
1458 
1459     emit_int8((unsigned char)0xE8);
1460     emit_data(int(0), rtype, operand);
1461   }
1462 }
1463 
1464 void Assembler::call(Register dst) {
1465   int encode = prefix_and_encode(dst->encoding());
1466   emit_int8((unsigned char)0xFF);
1467   emit_int8((unsigned char)(0xD0 | encode));
1468 }
1469 
1470 
1471 void Assembler::call(Address adr) {
1472   InstructionMark im(this);
1473   prefix(adr);
1474   emit_int8((unsigned char)0xFF);
1475   emit_operand(rdx, adr);
1476 }
1477 
1478 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1479   assert(entry != NULL, "call most probably wrong");
1480   InstructionMark im(this);
1481   emit_int8((unsigned char)0xE8);
1482   intptr_t disp = entry - (pc() + sizeof(int32_t));
1483   assert(is_simm32(disp), "must be 32bit offset (call2)");
1484   // Technically, should use call32_operand, but this format is
1485   // implied by the fact that we're emitting a call instruction.
1486 
1487   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1488   emit_data((int) disp, rspec, operand);
1489 }
1490 
1491 void Assembler::cdql() {
1492   emit_int8((unsigned char)0x99);
1493 }
1494 
1495 void Assembler::cld() {
1496   emit_int8((unsigned char)0xFC);
1497 }
1498 
1499 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1500   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1501   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1502   emit_int8(0x0F);
1503   emit_int8(0x40 | cc);
1504   emit_int8((unsigned char)(0xC0 | encode));
1505 }
1506 
1507 
1508 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1509   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1510   prefix(src, dst);
1511   emit_int8(0x0F);
1512   emit_int8(0x40 | cc);
1513   emit_operand(dst, src);
1514 }
1515 
1516 void Assembler::cmpb(Address dst, int imm8) {
1517   InstructionMark im(this);
1518   prefix(dst);
1519   emit_int8((unsigned char)0x80);
1520   emit_operand(rdi, dst, 1);
1521   emit_int8(imm8);
1522 }
1523 
1524 void Assembler::cmpl(Address dst, int32_t imm32) {
1525   InstructionMark im(this);
1526   prefix(dst);
1527   emit_int8((unsigned char)0x81);
1528   emit_operand(rdi, dst, 4);
1529   emit_int32(imm32);
1530 }
1531 
1532 void Assembler::cmpl(Register dst, int32_t imm32) {
1533   prefix(dst);
1534   emit_arith(0x81, 0xF8, dst, imm32);
1535 }
1536 
1537 void Assembler::cmpl(Register dst, Register src) {
1538   (void) prefix_and_encode(dst->encoding(), src->encoding());
1539   emit_arith(0x3B, 0xC0, dst, src);
1540 }
1541 
1542 
1543 void Assembler::cmpl(Register dst, Address  src) {
1544   InstructionMark im(this);
1545   prefix(src, dst);
1546   emit_int8((unsigned char)0x3B);
1547   emit_operand(dst, src);
1548 }
1549 
1550 void Assembler::cmpw(Address dst, int imm16) {
1551   InstructionMark im(this);
1552   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1553   emit_int8(0x66);
1554   emit_int8((unsigned char)0x81);
1555   emit_operand(rdi, dst, 2);
1556   emit_int16(imm16);
1557 }
1558 
1559 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1560 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1561 // The ZF is set if the compared values were equal, and cleared otherwise.
1562 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1563   InstructionMark im(this);
1564   prefix(adr, reg);
1565   emit_int8(0x0F);
1566   emit_int8((unsigned char)0xB1);
1567   emit_operand(reg, adr);
1568 }
1569 
1570 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1571 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1572 // The ZF is set if the compared values were equal, and cleared otherwise.
1573 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1574   InstructionMark im(this);
1575   prefix(adr, reg, true);
1576   emit_int8(0x0F);
1577   emit_int8((unsigned char)0xB0);
1578   emit_operand(reg, adr);
1579 }
1580 
1581 void Assembler::comisd(XMMRegister dst, Address src) {
1582   // NOTE: dbx seems to decode this as comiss even though the
1583   // 0x66 is there. Strangly ucomisd comes out correct
1584   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1585   if (VM_Version::supports_evex()) {
1586     _tuple_type = EVEX_T1S;
1587     _input_size_in_bits = EVEX_64bit;
1588     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1589   } else {
1590     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1591   }
1592 }
1593 
1594 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1596   if (VM_Version::supports_evex()) {
1597     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
1598   } else {
1599     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1600   }
1601 }
1602 
1603 void Assembler::comiss(XMMRegister dst, Address src) {
1604   if (VM_Version::supports_evex()) {
1605     _tuple_type = EVEX_T1S;
1606     _input_size_in_bits = EVEX_32bit;
1607   }
1608   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1609   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1610 }
1611 
1612 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1613   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1614   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
1615 }
1616 
1617 void Assembler::cpuid() {
1618   emit_int8(0x0F);
1619   emit_int8((unsigned char)0xA2);
1620 }
1621 
1622 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1623 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1624 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1625 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1626 //
1627 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1628 //
1629 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1630 //
1631 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1632 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1633   assert(VM_Version::supports_sse4_2(), "");
1634   int8_t w = 0x01;
1635   Prefix p = Prefix_EMPTY;
1636 
1637   emit_int8((int8_t)0xF2);
1638   switch (sizeInBytes) {
1639   case 1:
1640     w = 0;
1641     break;
1642   case 2:
1643   case 4:
1644     break;
1645   LP64_ONLY(case 8:)
1646     // This instruction is not valid in 32 bits
1647     // Note:
1648     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1649     //
1650     // Page B - 72   Vol. 2C says
1651     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1652     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1653     //                                                                            F0!!!
1654     // while 3 - 208 Vol. 2A
1655     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1656     //
1657     // the 0 on a last bit is reserved for a different flavor of this instruction :
1658     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1659     p = REX_W;
1660     break;
1661   default:
1662     assert(0, "Unsupported value for a sizeInBytes argument");
1663     break;
1664   }
1665   LP64_ONLY(prefix(crc, v, p);)
1666   emit_int8((int8_t)0x0F);
1667   emit_int8(0x38);
1668   emit_int8((int8_t)(0xF0 | w));
1669   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1670 }
1671 
1672 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1673   assert(VM_Version::supports_sse4_2(), "");
1674   InstructionMark im(this);
1675   int8_t w = 0x01;
1676   Prefix p = Prefix_EMPTY;
1677 
1678   emit_int8((int8_t)0xF2);
1679   switch (sizeInBytes) {
1680   case 1:
1681     w = 0;
1682     break;
1683   case 2:
1684   case 4:
1685     break;
1686   LP64_ONLY(case 8:)
1687     // This instruction is not valid in 32 bits
1688     p = REX_W;
1689     break;
1690   default:
1691     assert(0, "Unsupported value for a sizeInBytes argument");
1692     break;
1693   }
1694   LP64_ONLY(prefix(crc, adr, p);)
1695   emit_int8((int8_t)0x0F);
1696   emit_int8(0x38);
1697   emit_int8((int8_t)(0xF0 | w));
1698   emit_operand(crc, adr);
1699 }
1700 
1701 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1702   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1703   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
1704 }
1705 
1706 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1707   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
1709 }
1710 
1711 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1712   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1713   if (VM_Version::supports_evex()) {
1714     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1715   } else {
1716     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1717   }
1718 }
1719 
1720 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1721   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1722   if (VM_Version::supports_evex()) {
1723     _tuple_type = EVEX_T1F;
1724     _input_size_in_bits = EVEX_64bit;
1725     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1726   } else {
1727     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1728   }
1729 }
1730 
1731 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1732   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1733   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
1734   emit_int8(0x2A);
1735   emit_int8((unsigned char)(0xC0 | encode));
1736 }
1737 
1738 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1739   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1740   if (VM_Version::supports_evex()) {
1741     _tuple_type = EVEX_T1S;
1742     _input_size_in_bits = EVEX_32bit;
1743     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
1744   } else {
1745     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1746   }
1747 }
1748 
1749 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1750   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1751   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1752   emit_int8(0x2A);
1753   emit_int8((unsigned char)(0xC0 | encode));
1754 }
1755 
1756 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1757   if (VM_Version::supports_evex()) {
1758     _tuple_type = EVEX_T1S;
1759     _input_size_in_bits = EVEX_32bit;
1760   }
1761   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1762   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1763 }
1764 
1765 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1766   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1767   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
1768   emit_int8(0x2A);
1769   emit_int8((unsigned char)(0xC0 | encode));
1770 }
1771 
1772 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1774   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1775 }
1776 
1777 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1778   if (VM_Version::supports_evex()) {
1779     _tuple_type = EVEX_T1S;
1780     _input_size_in_bits = EVEX_32bit;
1781   }
1782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1783   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1784 }
1785 
1786 
1787 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1788   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1789   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
1790   emit_int8(0x2C);
1791   emit_int8((unsigned char)(0xC0 | encode));
1792 }
1793 
1794 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1795   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1796   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
1797   emit_int8(0x2C);
1798   emit_int8((unsigned char)(0xC0 | encode));
1799 }
1800 
1801 void Assembler::decl(Address dst) {
1802   // Don't use it directly. Use MacroAssembler::decrement() instead.
1803   InstructionMark im(this);
1804   prefix(dst);
1805   emit_int8((unsigned char)0xFF);
1806   emit_operand(rcx, dst);
1807 }
1808 
1809 void Assembler::divsd(XMMRegister dst, Address src) {
1810   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1811   if (VM_Version::supports_evex()) {
1812     _tuple_type = EVEX_T1S;
1813     _input_size_in_bits = EVEX_64bit;
1814     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1815   } else {
1816     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1817   }
1818 }
1819 
1820 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1821   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1822   if (VM_Version::supports_evex()) {
1823     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1824   } else {
1825     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1826   }
1827 }
1828 
1829 void Assembler::divss(XMMRegister dst, Address src) {
1830   if (VM_Version::supports_evex()) {
1831     _tuple_type = EVEX_T1S;
1832     _input_size_in_bits = EVEX_32bit;
1833   }
1834   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1835   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1836 }
1837 
1838 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1839   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1840   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1841 }
1842 
1843 void Assembler::emms() {
1844   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1845   emit_int8(0x0F);
1846   emit_int8(0x77);
1847 }
1848 
1849 void Assembler::hlt() {
1850   emit_int8((unsigned char)0xF4);
1851 }
1852 
1853 void Assembler::idivl(Register src) {
1854   int encode = prefix_and_encode(src->encoding());
1855   emit_int8((unsigned char)0xF7);
1856   emit_int8((unsigned char)(0xF8 | encode));
1857 }
1858 
1859 void Assembler::divl(Register src) { // Unsigned
1860   int encode = prefix_and_encode(src->encoding());
1861   emit_int8((unsigned char)0xF7);
1862   emit_int8((unsigned char)(0xF0 | encode));
1863 }
1864 
1865 void Assembler::imull(Register dst, Register src) {
1866   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1867   emit_int8(0x0F);
1868   emit_int8((unsigned char)0xAF);
1869   emit_int8((unsigned char)(0xC0 | encode));
1870 }
1871 
1872 
1873 void Assembler::imull(Register dst, Register src, int value) {
1874   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1875   if (is8bit(value)) {
1876     emit_int8(0x6B);
1877     emit_int8((unsigned char)(0xC0 | encode));
1878     emit_int8(value & 0xFF);
1879   } else {
1880     emit_int8(0x69);
1881     emit_int8((unsigned char)(0xC0 | encode));
1882     emit_int32(value);
1883   }
1884 }
1885 
1886 void Assembler::imull(Register dst, Address src) {
1887   InstructionMark im(this);
1888   prefix(src, dst);
1889   emit_int8(0x0F);
1890   emit_int8((unsigned char) 0xAF);
1891   emit_operand(dst, src);
1892 }
1893 
1894 
1895 void Assembler::incl(Address dst) {
1896   // Don't use it directly. Use MacroAssembler::increment() instead.
1897   InstructionMark im(this);
1898   prefix(dst);
1899   emit_int8((unsigned char)0xFF);
1900   emit_operand(rax, dst);
1901 }
1902 
1903 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1904   InstructionMark im(this);
1905   assert((0 <= cc) && (cc < 16), "illegal cc");
1906   if (L.is_bound()) {
1907     address dst = target(L);
1908     assert(dst != NULL, "jcc most probably wrong");
1909 
1910     const int short_size = 2;
1911     const int long_size = 6;
1912     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1913     if (maybe_short && is8bit(offs - short_size)) {
1914       // 0111 tttn #8-bit disp
1915       emit_int8(0x70 | cc);
1916       emit_int8((offs - short_size) & 0xFF);
1917     } else {
1918       // 0000 1111 1000 tttn #32-bit disp
1919       assert(is_simm32(offs - long_size),
1920              "must be 32bit offset (call4)");
1921       emit_int8(0x0F);
1922       emit_int8((unsigned char)(0x80 | cc));
1923       emit_int32(offs - long_size);
1924     }
1925   } else {
1926     // Note: could eliminate cond. jumps to this jump if condition
1927     //       is the same however, seems to be rather unlikely case.
1928     // Note: use jccb() if label to be bound is very close to get
1929     //       an 8-bit displacement
1930     L.add_patch_at(code(), locator());
1931     emit_int8(0x0F);
1932     emit_int8((unsigned char)(0x80 | cc));
1933     emit_int32(0);
1934   }
1935 }
1936 
1937 void Assembler::jccb(Condition cc, Label& L) {
1938   if (L.is_bound()) {
1939     const int short_size = 2;
1940     address entry = target(L);
1941 #ifdef ASSERT
1942     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1943     intptr_t delta = short_branch_delta();
1944     if (delta != 0) {
1945       dist += (dist < 0 ? (-delta) :delta);
1946     }
1947     assert(is8bit(dist), "Dispacement too large for a short jmp");
1948 #endif
1949     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1950     // 0111 tttn #8-bit disp
1951     emit_int8(0x70 | cc);
1952     emit_int8((offs - short_size) & 0xFF);
1953   } else {
1954     InstructionMark im(this);
1955     L.add_patch_at(code(), locator());
1956     emit_int8(0x70 | cc);
1957     emit_int8(0);
1958   }
1959 }
1960 
1961 void Assembler::jmp(Address adr) {
1962   InstructionMark im(this);
1963   prefix(adr);
1964   emit_int8((unsigned char)0xFF);
1965   emit_operand(rsp, adr);
1966 }
1967 
1968 void Assembler::jmp(Label& L, bool maybe_short) {
1969   if (L.is_bound()) {
1970     address entry = target(L);
1971     assert(entry != NULL, "jmp most probably wrong");
1972     InstructionMark im(this);
1973     const int short_size = 2;
1974     const int long_size = 5;
1975     intptr_t offs = entry - pc();
1976     if (maybe_short && is8bit(offs - short_size)) {
1977       emit_int8((unsigned char)0xEB);
1978       emit_int8((offs - short_size) & 0xFF);
1979     } else {
1980       emit_int8((unsigned char)0xE9);
1981       emit_int32(offs - long_size);
1982     }
1983   } else {
1984     // By default, forward jumps are always 32-bit displacements, since
1985     // we can't yet know where the label will be bound.  If you're sure that
1986     // the forward jump will not run beyond 256 bytes, use jmpb to
1987     // force an 8-bit displacement.
1988     InstructionMark im(this);
1989     L.add_patch_at(code(), locator());
1990     emit_int8((unsigned char)0xE9);
1991     emit_int32(0);
1992   }
1993 }
1994 
1995 void Assembler::jmp(Register entry) {
1996   int encode = prefix_and_encode(entry->encoding());
1997   emit_int8((unsigned char)0xFF);
1998   emit_int8((unsigned char)(0xE0 | encode));
1999 }
2000 
2001 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2002   InstructionMark im(this);
2003   emit_int8((unsigned char)0xE9);
2004   assert(dest != NULL, "must have a target");
2005   intptr_t disp = dest - (pc() + sizeof(int32_t));
2006   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2007   emit_data(disp, rspec.reloc(), call32_operand);
2008 }
2009 
2010 void Assembler::jmpb(Label& L) {
2011   if (L.is_bound()) {
2012     const int short_size = 2;
2013     address entry = target(L);
2014     assert(entry != NULL, "jmp most probably wrong");
2015 #ifdef ASSERT
2016     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2017     intptr_t delta = short_branch_delta();
2018     if (delta != 0) {
2019       dist += (dist < 0 ? (-delta) :delta);
2020     }
2021     assert(is8bit(dist), "Dispacement too large for a short jmp");
2022 #endif
2023     intptr_t offs = entry - pc();
2024     emit_int8((unsigned char)0xEB);
2025     emit_int8((offs - short_size) & 0xFF);
2026   } else {
2027     InstructionMark im(this);
2028     L.add_patch_at(code(), locator());
2029     emit_int8((unsigned char)0xEB);
2030     emit_int8(0);
2031   }
2032 }
2033 
2034 void Assembler::ldmxcsr( Address src) {
2035   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2036   InstructionMark im(this);
2037   prefix(src);
2038   emit_int8(0x0F);
2039   emit_int8((unsigned char)0xAE);
2040   emit_operand(as_Register(2), src);
2041 }
2042 
2043 void Assembler::leal(Register dst, Address src) {
2044   InstructionMark im(this);
2045 #ifdef _LP64
2046   emit_int8(0x67); // addr32
2047   prefix(src, dst);
2048 #endif // LP64
2049   emit_int8((unsigned char)0x8D);
2050   emit_operand(dst, src);
2051 }
2052 
2053 void Assembler::lfence() {
2054   emit_int8(0x0F);
2055   emit_int8((unsigned char)0xAE);
2056   emit_int8((unsigned char)0xE8);
2057 }
2058 
2059 void Assembler::lock() {
2060   emit_int8((unsigned char)0xF0);
2061 }
2062 
2063 void Assembler::lzcntl(Register dst, Register src) {
2064   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2065   emit_int8((unsigned char)0xF3);
2066   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2067   emit_int8(0x0F);
2068   emit_int8((unsigned char)0xBD);
2069   emit_int8((unsigned char)(0xC0 | encode));
2070 }
2071 
2072 // Emit mfence instruction
2073 void Assembler::mfence() {
2074   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2075   emit_int8(0x0F);
2076   emit_int8((unsigned char)0xAE);
2077   emit_int8((unsigned char)0xF0);
2078 }
2079 
2080 void Assembler::mov(Register dst, Register src) {
2081   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2082 }
2083 
2084 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2085   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2086   if (VM_Version::supports_avx512novl()) {
2087     int vector_len = AVX_512bit;
2088     int dst_enc = dst->encoding();
2089     int src_enc = src->encoding();
2090     int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
2091                                        /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2092     emit_int8(0x28);
2093     emit_int8((unsigned char)(0xC0 | encode));
2094   } else if (VM_Version::supports_evex()) {
2095     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
2096   } else {
2097     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
2098   }
2099 }
2100 
2101 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2102   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2103   if (VM_Version::supports_avx512novl()) {
2104     int vector_len = AVX_512bit;
2105     int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
2106     emit_int8(0x28);
2107     emit_int8((unsigned char)(0xC0 | encode));
2108   } else {
2109     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
2110   }
2111 }
2112 
2113 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2114   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2115   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2116   emit_int8(0x16);
2117   emit_int8((unsigned char)(0xC0 | encode));
2118 }
2119 
2120 void Assembler::movb(Register dst, Address src) {
2121   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2122   InstructionMark im(this);
2123   prefix(src, dst, true);
2124   emit_int8((unsigned char)0x8A);
2125   emit_operand(dst, src);
2126 }
2127 
2128 void Assembler::kmovql(KRegister dst, KRegister src) {
2129   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2130   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
2131                                       /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
2132   emit_int8((unsigned char)0x90);
2133   emit_int8((unsigned char)(0xC0 | encode));
2134 }
2135 
2136 void Assembler::kmovql(KRegister dst, Address src) {
2137   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2138   int dst_enc = dst->encoding();
2139   int nds_enc = 0;
2140   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
2141              VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2142   emit_int8((unsigned char)0x90);
2143   emit_operand((Register)dst, src);
2144 }
2145 
2146 void Assembler::kmovql(Address dst, KRegister src) {
2147   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2148   int src_enc = src->encoding();
2149   int nds_enc = 0;
2150   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
2151              VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
2152   emit_int8((unsigned char)0x90);
2153   emit_operand((Register)src, dst);
2154 }
2155 
2156 void Assembler::kmovql(KRegister dst, Register src) {
2157   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2158   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2159   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
2160                                       VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
2161   emit_int8((unsigned char)0x92);
2162   emit_int8((unsigned char)(0xC0 | encode));
2163 }
2164 
2165 void Assembler::kmovdl(KRegister dst, Register src) {
2166   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2167   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2168   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
2169   emit_int8((unsigned char)0x92);
2170   emit_int8((unsigned char)(0xC0 | encode));
2171 }
2172 
2173 void Assembler::kmovwl(KRegister dst, Register src) {
2174   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2175   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
2176   emit_int8((unsigned char)0x92);
2177   emit_int8((unsigned char)(0xC0 | encode));
2178 }
2179 
2180 void Assembler::movb(Address dst, int imm8) {
2181   InstructionMark im(this);
2182    prefix(dst);
2183   emit_int8((unsigned char)0xC6);
2184   emit_operand(rax, dst, 1);
2185   emit_int8(imm8);
2186 }
2187 
2188 
2189 void Assembler::movb(Address dst, Register src) {
2190   assert(src->has_byte_register(), "must have byte register");
2191   InstructionMark im(this);
2192   prefix(dst, src, true);
2193   emit_int8((unsigned char)0x88);
2194   emit_operand(src, dst);
2195 }
2196 
2197 void Assembler::movdl(XMMRegister dst, Register src) {
2198   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2199   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2200   emit_int8(0x6E);
2201   emit_int8((unsigned char)(0xC0 | encode));
2202 }
2203 
2204 void Assembler::movdl(Register dst, XMMRegister src) {
2205   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2206   // swap src/dst to get correct prefix
2207   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
2208   emit_int8(0x7E);
2209   emit_int8((unsigned char)(0xC0 | encode));
2210 }
2211 
2212 void Assembler::movdl(XMMRegister dst, Address src) {
2213   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2214   if (VM_Version::supports_evex()) {
2215     _tuple_type = EVEX_T1S;
2216     _input_size_in_bits = EVEX_32bit;
2217   }
2218   InstructionMark im(this);
2219   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2220   emit_int8(0x6E);
2221   emit_operand(dst, src);
2222 }
2223 
2224 void Assembler::movdl(Address dst, XMMRegister src) {
2225   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2226   if (VM_Version::supports_evex()) {
2227     _tuple_type = EVEX_T1S;
2228     _input_size_in_bits = EVEX_32bit;
2229   }
2230   InstructionMark im(this);
2231   simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
2232   emit_int8(0x7E);
2233   emit_operand(src, dst);
2234 }
2235 
2236 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2237   _instruction_uses_vl = true;
2238   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2239   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2240 }
2241 
2242 void Assembler::movdqa(XMMRegister dst, Address src) {
2243   _instruction_uses_vl = true;
2244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2245   if (VM_Version::supports_evex()) {
2246     _tuple_type = EVEX_FVM;
2247   }
2248   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2249 }
2250 
2251 void Assembler::movdqu(XMMRegister dst, Address src) {
2252   _instruction_uses_vl = true;
2253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2254   if (VM_Version::supports_evex()) {
2255     _tuple_type = EVEX_FVM;
2256   }
2257   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2258 }
2259 
2260 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2261   _instruction_uses_vl = true;
2262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2263   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2264 }
2265 
2266 void Assembler::movdqu(Address dst, XMMRegister src) {
2267   _instruction_uses_vl = true;
2268   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2269   if (VM_Version::supports_evex()) {
2270     _tuple_type = EVEX_FVM;
2271   }
2272   InstructionMark im(this);
2273   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2274   emit_int8(0x7F);
2275   emit_operand(src, dst);
2276 }
2277 
2278 // Move Unaligned 256bit Vector
2279 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2280   _instruction_uses_vl = true;
2281   assert(UseAVX > 0, "");
2282   int vector_len = AVX_256bit;
2283   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2284   emit_int8(0x6F);
2285   emit_int8((unsigned char)(0xC0 | encode));
2286 }
2287 
2288 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2289   _instruction_uses_vl = true;
2290   assert(UseAVX > 0, "");
2291   if (VM_Version::supports_evex()) {
2292     _tuple_type = EVEX_FVM;
2293   }
2294   InstructionMark im(this);
2295   int vector_len = AVX_256bit;
2296   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2297   emit_int8(0x6F);
2298   emit_operand(dst, src);
2299 }
2300 
2301 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2302   _instruction_uses_vl = true;
2303   assert(UseAVX > 0, "");
2304   if (VM_Version::supports_evex()) {
2305     _tuple_type = EVEX_FVM;
2306   }
2307   InstructionMark im(this);
2308   int vector_len = AVX_256bit;
2309   // swap src<->dst for encoding
2310   assert(src != xnoreg, "sanity");
2311   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2312   emit_int8(0x7F);
2313   emit_operand(src, dst);
2314 }
2315 
2316 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2317 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2318   _instruction_uses_vl = true;
2319   assert(UseAVX > 0, "");
2320   int src_enc = src->encoding();
2321   int dst_enc = dst->encoding();
2322   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2323                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2324   emit_int8(0x6F);
2325   emit_int8((unsigned char)(0xC0 | encode));
2326 }
2327 
2328 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2329   _instruction_uses_vl = true;
2330   assert(UseAVX > 0, "");
2331   InstructionMark im(this);
2332   if (VM_Version::supports_evex()) {
2333     _tuple_type = EVEX_FVM;
2334   }
2335   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2336   emit_int8(0x6F);
2337   emit_operand(dst, src);
2338 }
2339 
2340 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2341   _instruction_uses_vl = true;
2342   assert(UseAVX > 0, "");
2343   InstructionMark im(this);
2344   assert(src != xnoreg, "sanity");
2345   if (VM_Version::supports_evex()) {
2346     _tuple_type = EVEX_FVM;
2347   }
2348   // swap src<->dst for encoding
2349   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2350   emit_int8(0x7F);
2351   emit_operand(src, dst);
2352 }
2353 
2354 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2355   _instruction_uses_vl = true;
2356   assert(UseAVX > 0, "");
2357   int src_enc = src->encoding();
2358   int dst_enc = dst->encoding();
2359   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2360                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
2361   emit_int8(0x6F);
2362   emit_int8((unsigned char)(0xC0 | encode));
2363 }
2364 
2365 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2366   _instruction_uses_vl = true;
2367   assert(UseAVX > 2, "");
2368   InstructionMark im(this);
2369   _tuple_type = EVEX_FVM;
2370   vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2371   emit_int8(0x6F);
2372   emit_operand(dst, src);
2373 }
2374 
2375 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2376   _instruction_uses_vl = true;
2377   assert(UseAVX > 2, "");
2378   InstructionMark im(this);
2379   assert(src != xnoreg, "sanity");
2380   _tuple_type = EVEX_FVM;
2381   // swap src<->dst for encoding
2382   vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
2383   emit_int8(0x7F);
2384   emit_operand(src, dst);
2385 }
2386 
2387 // Uses zero extension on 64bit
2388 
2389 void Assembler::movl(Register dst, int32_t imm32) {
2390   int encode = prefix_and_encode(dst->encoding());
2391   emit_int8((unsigned char)(0xB8 | encode));
2392   emit_int32(imm32);
2393 }
2394 
2395 void Assembler::movl(Register dst, Register src) {
2396   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2397   emit_int8((unsigned char)0x8B);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399 }
2400 
2401 void Assembler::movl(Register dst, Address src) {
2402   InstructionMark im(this);
2403   prefix(src, dst);
2404   emit_int8((unsigned char)0x8B);
2405   emit_operand(dst, src);
2406 }
2407 
2408 void Assembler::movl(Address dst, int32_t imm32) {
2409   InstructionMark im(this);
2410   prefix(dst);
2411   emit_int8((unsigned char)0xC7);
2412   emit_operand(rax, dst, 4);
2413   emit_int32(imm32);
2414 }
2415 
2416 void Assembler::movl(Address dst, Register src) {
2417   InstructionMark im(this);
2418   prefix(dst, src);
2419   emit_int8((unsigned char)0x89);
2420   emit_operand(src, dst);
2421 }
2422 
2423 // New cpus require to use movsd and movss to avoid partial register stall
2424 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2425 // The selection is done in MacroAssembler::movdbl() and movflt().
2426 void Assembler::movlpd(XMMRegister dst, Address src) {
2427   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2428   if (VM_Version::supports_evex()) {
2429     _tuple_type = EVEX_T1S;
2430     _input_size_in_bits = EVEX_32bit;
2431     emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2432   } else {
2433     emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2434   }
2435 }
2436 
2437 void Assembler::movq( MMXRegister dst, Address src ) {
2438   assert( VM_Version::supports_mmx(), "" );
2439   emit_int8(0x0F);
2440   emit_int8(0x6F);
2441   emit_operand(dst, src);
2442 }
2443 
2444 void Assembler::movq( Address dst, MMXRegister src ) {
2445   assert( VM_Version::supports_mmx(), "" );
2446   emit_int8(0x0F);
2447   emit_int8(0x7F);
2448   // workaround gcc (3.2.1-7a) bug
2449   // In that version of gcc with only an emit_operand(MMX, Address)
2450   // gcc will tail jump and try and reverse the parameters completely
2451   // obliterating dst in the process. By having a version available
2452   // that doesn't need to swap the args at the tail jump the bug is
2453   // avoided.
2454   emit_operand(dst, src);
2455 }
2456 
2457 void Assembler::movq(XMMRegister dst, Address src) {
2458   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2459   InstructionMark im(this);
2460   if (VM_Version::supports_evex()) {
2461     _tuple_type = EVEX_T1S;
2462     _input_size_in_bits = EVEX_64bit;
2463     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2464   } else {
2465     simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2466   }
2467   emit_int8(0x7E);
2468   emit_operand(dst, src);
2469 }
2470 
2471 void Assembler::movq(Address dst, XMMRegister src) {
2472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473   InstructionMark im(this);
2474   if (VM_Version::supports_evex()) {
2475     _tuple_type = EVEX_T1S;
2476     _input_size_in_bits = EVEX_64bit;
2477     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
2478                 VEX_OPCODE_0F, /* rex_w */ true);
2479   } else {
2480     simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
2481   }
2482   emit_int8((unsigned char)0xD6);
2483   emit_operand(src, dst);
2484 }
2485 
2486 void Assembler::movsbl(Register dst, Address src) { // movsxb
2487   InstructionMark im(this);
2488   prefix(src, dst);
2489   emit_int8(0x0F);
2490   emit_int8((unsigned char)0xBE);
2491   emit_operand(dst, src);
2492 }
2493 
2494 void Assembler::movsbl(Register dst, Register src) { // movsxb
2495   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2496   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2497   emit_int8(0x0F);
2498   emit_int8((unsigned char)0xBE);
2499   emit_int8((unsigned char)(0xC0 | encode));
2500 }
2501 
2502 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2503   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2504   if (VM_Version::supports_evex()) {
2505     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2506   } else {
2507     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2508   }
2509 }
2510 
2511 void Assembler::movsd(XMMRegister dst, Address src) {
2512   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2513   if (VM_Version::supports_evex()) {
2514     _tuple_type = EVEX_T1S;
2515     _input_size_in_bits = EVEX_64bit;
2516     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
2517   } else {
2518     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2519   }
2520 }
2521 
2522 void Assembler::movsd(Address dst, XMMRegister src) {
2523   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2524   InstructionMark im(this);
2525   if (VM_Version::supports_evex()) {
2526     _tuple_type = EVEX_T1S;
2527     _input_size_in_bits = EVEX_64bit;
2528     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2529   } else {
2530     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
2531   }
2532   emit_int8(0x11);
2533   emit_operand(src, dst);
2534 }
2535 
2536 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2537   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2538   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2539 }
2540 
2541 void Assembler::movss(XMMRegister dst, Address src) {
2542   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2543   if (VM_Version::supports_evex()) {
2544     _tuple_type = EVEX_T1S;
2545     _input_size_in_bits = EVEX_32bit;
2546   }
2547   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
2548 }
2549 
2550 void Assembler::movss(Address dst, XMMRegister src) {
2551   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2552   if (VM_Version::supports_evex()) {
2553     _tuple_type = EVEX_T1S;
2554     _input_size_in_bits = EVEX_32bit;
2555   }
2556   InstructionMark im(this);
2557   simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
2558   emit_int8(0x11);
2559   emit_operand(src, dst);
2560 }
2561 
2562 void Assembler::movswl(Register dst, Address src) { // movsxw
2563   InstructionMark im(this);
2564   prefix(src, dst);
2565   emit_int8(0x0F);
2566   emit_int8((unsigned char)0xBF);
2567   emit_operand(dst, src);
2568 }
2569 
2570 void Assembler::movswl(Register dst, Register src) { // movsxw
2571   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2572   emit_int8(0x0F);
2573   emit_int8((unsigned char)0xBF);
2574   emit_int8((unsigned char)(0xC0 | encode));
2575 }
2576 
2577 void Assembler::movw(Address dst, int imm16) {
2578   InstructionMark im(this);
2579 
2580   emit_int8(0x66); // switch to 16-bit mode
2581   prefix(dst);
2582   emit_int8((unsigned char)0xC7);
2583   emit_operand(rax, dst, 2);
2584   emit_int16(imm16);
2585 }
2586 
2587 void Assembler::movw(Register dst, Address src) {
2588   InstructionMark im(this);
2589   emit_int8(0x66);
2590   prefix(src, dst);
2591   emit_int8((unsigned char)0x8B);
2592   emit_operand(dst, src);
2593 }
2594 
2595 void Assembler::movw(Address dst, Register src) {
2596   InstructionMark im(this);
2597   emit_int8(0x66);
2598   prefix(dst, src);
2599   emit_int8((unsigned char)0x89);
2600   emit_operand(src, dst);
2601 }
2602 
2603 void Assembler::movzbl(Register dst, Address src) { // movzxb
2604   InstructionMark im(this);
2605   prefix(src, dst);
2606   emit_int8(0x0F);
2607   emit_int8((unsigned char)0xB6);
2608   emit_operand(dst, src);
2609 }
2610 
2611 void Assembler::movzbl(Register dst, Register src) { // movzxb
2612   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2613   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2614   emit_int8(0x0F);
2615   emit_int8((unsigned char)0xB6);
2616   emit_int8(0xC0 | encode);
2617 }
2618 
2619 void Assembler::movzwl(Register dst, Address src) { // movzxw
2620   InstructionMark im(this);
2621   prefix(src, dst);
2622   emit_int8(0x0F);
2623   emit_int8((unsigned char)0xB7);
2624   emit_operand(dst, src);
2625 }
2626 
2627 void Assembler::movzwl(Register dst, Register src) { // movzxw
2628   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2629   emit_int8(0x0F);
2630   emit_int8((unsigned char)0xB7);
2631   emit_int8(0xC0 | encode);
2632 }
2633 
2634 void Assembler::mull(Address src) {
2635   InstructionMark im(this);
2636   prefix(src);
2637   emit_int8((unsigned char)0xF7);
2638   emit_operand(rsp, src);
2639 }
2640 
2641 void Assembler::mull(Register src) {
2642   int encode = prefix_and_encode(src->encoding());
2643   emit_int8((unsigned char)0xF7);
2644   emit_int8((unsigned char)(0xE0 | encode));
2645 }
2646 
2647 void Assembler::mulsd(XMMRegister dst, Address src) {
2648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2649   if (VM_Version::supports_evex()) {
2650     _tuple_type = EVEX_T1S;
2651     _input_size_in_bits = EVEX_64bit;
2652     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2653   } else {
2654     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2655   }
2656 }
2657 
2658 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2659   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2660   if (VM_Version::supports_evex()) {
2661     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2662   } else {
2663     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2664   }
2665 }
2666 
2667 void Assembler::mulss(XMMRegister dst, Address src) {
2668   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2669   if (VM_Version::supports_evex()) {
2670     _tuple_type = EVEX_T1S;
2671     _input_size_in_bits = EVEX_32bit;
2672   }
2673   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2674 }
2675 
2676 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2677   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2678   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2679 }
2680 
2681 void Assembler::negl(Register dst) {
2682   int encode = prefix_and_encode(dst->encoding());
2683   emit_int8((unsigned char)0xF7);
2684   emit_int8((unsigned char)(0xD8 | encode));
2685 }
2686 
2687 void Assembler::nop(int i) {
2688 #ifdef ASSERT
2689   assert(i > 0, " ");
2690   // The fancy nops aren't currently recognized by debuggers making it a
2691   // pain to disassemble code while debugging. If asserts are on clearly
2692   // speed is not an issue so simply use the single byte traditional nop
2693   // to do alignment.
2694 
2695   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2696   return;
2697 
2698 #endif // ASSERT
2699 
2700   if (UseAddressNop && VM_Version::is_intel()) {
2701     //
2702     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2703     //  1: 0x90
2704     //  2: 0x66 0x90
2705     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2706     //  4: 0x0F 0x1F 0x40 0x00
2707     //  5: 0x0F 0x1F 0x44 0x00 0x00
2708     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2709     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2710     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2711     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2712     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2713     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2714 
2715     // The rest coding is Intel specific - don't use consecutive address nops
2716 
2717     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2718     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2719     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2720     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2721 
2722     while(i >= 15) {
2723       // For Intel don't generate consecutive addess nops (mix with regular nops)
2724       i -= 15;
2725       emit_int8(0x66);   // size prefix
2726       emit_int8(0x66);   // size prefix
2727       emit_int8(0x66);   // size prefix
2728       addr_nop_8();
2729       emit_int8(0x66);   // size prefix
2730       emit_int8(0x66);   // size prefix
2731       emit_int8(0x66);   // size prefix
2732       emit_int8((unsigned char)0x90);
2733                          // nop
2734     }
2735     switch (i) {
2736       case 14:
2737         emit_int8(0x66); // size prefix
2738       case 13:
2739         emit_int8(0x66); // size prefix
2740       case 12:
2741         addr_nop_8();
2742         emit_int8(0x66); // size prefix
2743         emit_int8(0x66); // size prefix
2744         emit_int8(0x66); // size prefix
2745         emit_int8((unsigned char)0x90);
2746                          // nop
2747         break;
2748       case 11:
2749         emit_int8(0x66); // size prefix
2750       case 10:
2751         emit_int8(0x66); // size prefix
2752       case 9:
2753         emit_int8(0x66); // size prefix
2754       case 8:
2755         addr_nop_8();
2756         break;
2757       case 7:
2758         addr_nop_7();
2759         break;
2760       case 6:
2761         emit_int8(0x66); // size prefix
2762       case 5:
2763         addr_nop_5();
2764         break;
2765       case 4:
2766         addr_nop_4();
2767         break;
2768       case 3:
2769         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2770         emit_int8(0x66); // size prefix
2771       case 2:
2772         emit_int8(0x66); // size prefix
2773       case 1:
2774         emit_int8((unsigned char)0x90);
2775                          // nop
2776         break;
2777       default:
2778         assert(i == 0, " ");
2779     }
2780     return;
2781   }
2782   if (UseAddressNop && VM_Version::is_amd()) {
2783     //
2784     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2785     //  1: 0x90
2786     //  2: 0x66 0x90
2787     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2788     //  4: 0x0F 0x1F 0x40 0x00
2789     //  5: 0x0F 0x1F 0x44 0x00 0x00
2790     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2791     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2792     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2793     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2794     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2795     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2796 
2797     // The rest coding is AMD specific - use consecutive address nops
2798 
2799     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2800     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2801     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2802     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2803     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2804     //     Size prefixes (0x66) are added for larger sizes
2805 
2806     while(i >= 22) {
2807       i -= 11;
2808       emit_int8(0x66); // size prefix
2809       emit_int8(0x66); // size prefix
2810       emit_int8(0x66); // size prefix
2811       addr_nop_8();
2812     }
2813     // Generate first nop for size between 21-12
2814     switch (i) {
2815       case 21:
2816         i -= 1;
2817         emit_int8(0x66); // size prefix
2818       case 20:
2819       case 19:
2820         i -= 1;
2821         emit_int8(0x66); // size prefix
2822       case 18:
2823       case 17:
2824         i -= 1;
2825         emit_int8(0x66); // size prefix
2826       case 16:
2827       case 15:
2828         i -= 8;
2829         addr_nop_8();
2830         break;
2831       case 14:
2832       case 13:
2833         i -= 7;
2834         addr_nop_7();
2835         break;
2836       case 12:
2837         i -= 6;
2838         emit_int8(0x66); // size prefix
2839         addr_nop_5();
2840         break;
2841       default:
2842         assert(i < 12, " ");
2843     }
2844 
2845     // Generate second nop for size between 11-1
2846     switch (i) {
2847       case 11:
2848         emit_int8(0x66); // size prefix
2849       case 10:
2850         emit_int8(0x66); // size prefix
2851       case 9:
2852         emit_int8(0x66); // size prefix
2853       case 8:
2854         addr_nop_8();
2855         break;
2856       case 7:
2857         addr_nop_7();
2858         break;
2859       case 6:
2860         emit_int8(0x66); // size prefix
2861       case 5:
2862         addr_nop_5();
2863         break;
2864       case 4:
2865         addr_nop_4();
2866         break;
2867       case 3:
2868         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2869         emit_int8(0x66); // size prefix
2870       case 2:
2871         emit_int8(0x66); // size prefix
2872       case 1:
2873         emit_int8((unsigned char)0x90);
2874                          // nop
2875         break;
2876       default:
2877         assert(i == 0, " ");
2878     }
2879     return;
2880   }
2881 
2882   // Using nops with size prefixes "0x66 0x90".
2883   // From AMD Optimization Guide:
2884   //  1: 0x90
2885   //  2: 0x66 0x90
2886   //  3: 0x66 0x66 0x90
2887   //  4: 0x66 0x66 0x66 0x90
2888   //  5: 0x66 0x66 0x90 0x66 0x90
2889   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2890   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2891   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2892   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2893   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2894   //
2895   while(i > 12) {
2896     i -= 4;
2897     emit_int8(0x66); // size prefix
2898     emit_int8(0x66);
2899     emit_int8(0x66);
2900     emit_int8((unsigned char)0x90);
2901                      // nop
2902   }
2903   // 1 - 12 nops
2904   if(i > 8) {
2905     if(i > 9) {
2906       i -= 1;
2907       emit_int8(0x66);
2908     }
2909     i -= 3;
2910     emit_int8(0x66);
2911     emit_int8(0x66);
2912     emit_int8((unsigned char)0x90);
2913   }
2914   // 1 - 8 nops
2915   if(i > 4) {
2916     if(i > 6) {
2917       i -= 1;
2918       emit_int8(0x66);
2919     }
2920     i -= 3;
2921     emit_int8(0x66);
2922     emit_int8(0x66);
2923     emit_int8((unsigned char)0x90);
2924   }
2925   switch (i) {
2926     case 4:
2927       emit_int8(0x66);
2928     case 3:
2929       emit_int8(0x66);
2930     case 2:
2931       emit_int8(0x66);
2932     case 1:
2933       emit_int8((unsigned char)0x90);
2934       break;
2935     default:
2936       assert(i == 0, " ");
2937   }
2938 }
2939 
2940 void Assembler::notl(Register dst) {
2941   int encode = prefix_and_encode(dst->encoding());
2942   emit_int8((unsigned char)0xF7);
2943   emit_int8((unsigned char)(0xD0 | encode));
2944 }
2945 
2946 void Assembler::orl(Address dst, int32_t imm32) {
2947   InstructionMark im(this);
2948   prefix(dst);
2949   emit_arith_operand(0x81, rcx, dst, imm32);
2950 }
2951 
2952 void Assembler::orl(Register dst, int32_t imm32) {
2953   prefix(dst);
2954   emit_arith(0x81, 0xC8, dst, imm32);
2955 }
2956 
2957 void Assembler::orl(Register dst, Address src) {
2958   InstructionMark im(this);
2959   prefix(src, dst);
2960   emit_int8(0x0B);
2961   emit_operand(dst, src);
2962 }
2963 
2964 void Assembler::orl(Register dst, Register src) {
2965   (void) prefix_and_encode(dst->encoding(), src->encoding());
2966   emit_arith(0x0B, 0xC0, dst, src);
2967 }
2968 
2969 void Assembler::orl(Address dst, Register src) {
2970   InstructionMark im(this);
2971   prefix(dst, src);
2972   emit_int8(0x09);
2973   emit_operand(src, dst);
2974 }
2975 
2976 void Assembler::packuswb(XMMRegister dst, Address src) {
2977   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2978   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2979   if (VM_Version::supports_evex()) {
2980     _tuple_type = EVEX_FV;
2981     _input_size_in_bits = EVEX_32bit;
2982   }
2983   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2984 }
2985 
2986 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2988   emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2989 }
2990 
2991 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
2992   assert(UseAVX > 0, "some form of AVX must be enabled");
2993   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
2994 }
2995 
2996 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
2997   _instruction_uses_vl = true;
2998   assert(VM_Version::supports_avx2(), "");
2999   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3000                                       VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
3001   emit_int8(0x00);
3002   emit_int8(0xC0 | encode);
3003   emit_int8(imm8);
3004 }
3005 
3006 void Assembler::pause() {
3007   emit_int8((unsigned char)0xF3);
3008   emit_int8((unsigned char)0x90);
3009 }
3010 
3011 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3012   assert(VM_Version::supports_sse4_2(), "");
3013   InstructionMark im(this);
3014   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
3015               /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3016   emit_int8(0x61);
3017   emit_operand(dst, src);
3018   emit_int8(imm8);
3019 }
3020 
3021 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3022   assert(VM_Version::supports_sse4_2(), "");
3023   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3024                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3025   emit_int8(0x61);
3026   emit_int8((unsigned char)(0xC0 | encode));
3027   emit_int8(imm8);
3028 }
3029 
3030 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3031   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3032   emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
3033                   false, (VM_Version::supports_avx512dq() == false));
3034 }
3035 
3036 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3037   assert(UseAVX > 0, "some form of AVX must be enabled");
3038   emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
3039                  false, (VM_Version::supports_avx512dq() == false));
3040 }
3041 
3042 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3043   assert(VM_Version::supports_sse2(), "");
3044   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
3045                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
3046   emit_int8((unsigned char)0xD7);
3047   emit_int8((unsigned char)(0xC0 | encode));
3048 }
3049 
3050 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3051   assert(VM_Version::supports_avx2(), "");
3052   int vector_len = AVX_256bit;
3053   int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
3054                                      vector_len, VEX_OPCODE_0F, true, false);
3055   emit_int8((unsigned char)0xD7);
3056   emit_int8((unsigned char)(0xC0 | encode));
3057 }
3058 
3059 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3060   assert(VM_Version::supports_sse4_1(), "");
3061   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3062                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3063   emit_int8(0x16);
3064   emit_int8((unsigned char)(0xC0 | encode));
3065   emit_int8(imm8);
3066 }
3067 
3068 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3069   assert(VM_Version::supports_sse4_1(), "");
3070   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
3071                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3072   emit_int8(0x16);
3073   emit_int8((unsigned char)(0xC0 | encode));
3074   emit_int8(imm8);
3075 }
3076 
3077 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3078   assert(VM_Version::supports_sse2(), "");
3079   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
3080                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3081   emit_int8((unsigned char)0xC5);
3082   emit_int8((unsigned char)(0xC0 | encode));
3083   emit_int8(imm8);
3084 }
3085 
3086 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3087   assert(VM_Version::supports_sse4_1(), "");
3088   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3089                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3090   emit_int8(0x22);
3091   emit_int8((unsigned char)(0xC0 | encode));
3092   emit_int8(imm8);
3093 }
3094 
3095 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3096   assert(VM_Version::supports_sse4_1(), "");
3097   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3098                                       VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
3099   emit_int8(0x22);
3100   emit_int8((unsigned char)(0xC0 | encode));
3101   emit_int8(imm8);
3102 }
3103 
3104 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3105   assert(VM_Version::supports_sse2(), "");
3106   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
3107                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3108   emit_int8((unsigned char)0xC4);
3109   emit_int8((unsigned char)(0xC0 | encode));
3110   emit_int8(imm8);
3111 }
3112 
3113 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3114   assert(VM_Version::supports_sse4_1(), "");
3115   if (VM_Version::supports_evex()) {
3116     _tuple_type = EVEX_HVM;
3117   }
3118   InstructionMark im(this);
3119   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3120   emit_int8(0x30);
3121   emit_operand(dst, src);
3122 }
3123 
3124 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3125   assert(VM_Version::supports_sse4_1(), "");
3126   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
3127   emit_int8(0x30);
3128   emit_int8((unsigned char)(0xC0 | encode));
3129 }
3130 
3131 void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
3132   assert(VM_Version::supports_avx(), "");
3133   InstructionMark im(this);
3134   bool vector256 = true;
3135   assert(dst != xnoreg, "sanity");
3136   int dst_enc = dst->encoding();
3137   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3138   emit_int8(0x30);
3139   emit_operand(dst, src);
3140 }
3141 
3142 // generic
3143 void Assembler::pop(Register dst) {
3144   int encode = prefix_and_encode(dst->encoding());
3145   emit_int8(0x58 | encode);
3146 }
3147 
3148 void Assembler::popcntl(Register dst, Address src) {
3149   assert(VM_Version::supports_popcnt(), "must support");
3150   InstructionMark im(this);
3151   emit_int8((unsigned char)0xF3);
3152   prefix(src, dst);
3153   emit_int8(0x0F);
3154   emit_int8((unsigned char)0xB8);
3155   emit_operand(dst, src);
3156 }
3157 
3158 void Assembler::popcntl(Register dst, Register src) {
3159   assert(VM_Version::supports_popcnt(), "must support");
3160   emit_int8((unsigned char)0xF3);
3161   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3162   emit_int8(0x0F);
3163   emit_int8((unsigned char)0xB8);
3164   emit_int8((unsigned char)(0xC0 | encode));
3165 }
3166 
3167 void Assembler::popf() {
3168   emit_int8((unsigned char)0x9D);
3169 }
3170 
3171 #ifndef _LP64 // no 32bit push/pop on amd64
3172 void Assembler::popl(Address dst) {
3173   // NOTE: this will adjust stack by 8byte on 64bits
3174   InstructionMark im(this);
3175   prefix(dst);
3176   emit_int8((unsigned char)0x8F);
3177   emit_operand(rax, dst);
3178 }
3179 #endif
3180 
3181 void Assembler::prefetch_prefix(Address src) {
3182   prefix(src);
3183   emit_int8(0x0F);
3184 }
3185 
3186 void Assembler::prefetchnta(Address src) {
3187   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3188   InstructionMark im(this);
3189   prefetch_prefix(src);
3190   emit_int8(0x18);
3191   emit_operand(rax, src); // 0, src
3192 }
3193 
3194 void Assembler::prefetchr(Address src) {
3195   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3196   InstructionMark im(this);
3197   prefetch_prefix(src);
3198   emit_int8(0x0D);
3199   emit_operand(rax, src); // 0, src
3200 }
3201 
3202 void Assembler::prefetcht0(Address src) {
3203   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3204   InstructionMark im(this);
3205   prefetch_prefix(src);
3206   emit_int8(0x18);
3207   emit_operand(rcx, src); // 1, src
3208 }
3209 
3210 void Assembler::prefetcht1(Address src) {
3211   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3212   InstructionMark im(this);
3213   prefetch_prefix(src);
3214   emit_int8(0x18);
3215   emit_operand(rdx, src); // 2, src
3216 }
3217 
3218 void Assembler::prefetcht2(Address src) {
3219   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3220   InstructionMark im(this);
3221   prefetch_prefix(src);
3222   emit_int8(0x18);
3223   emit_operand(rbx, src); // 3, src
3224 }
3225 
3226 void Assembler::prefetchw(Address src) {
3227   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3228   InstructionMark im(this);
3229   prefetch_prefix(src);
3230   emit_int8(0x0D);
3231   emit_operand(rcx, src); // 1, src
3232 }
3233 
3234 void Assembler::prefix(Prefix p) {
3235   emit_int8(p);
3236 }
3237 
3238 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3239   assert(VM_Version::supports_ssse3(), "");
3240   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3241                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3242   emit_int8(0x00);
3243   emit_int8((unsigned char)(0xC0 | encode));
3244 }
3245 
3246 void Assembler::pshufb(XMMRegister dst, Address src) {
3247   assert(VM_Version::supports_ssse3(), "");
3248   if (VM_Version::supports_evex()) {
3249     _tuple_type = EVEX_FVM;
3250   }
3251   InstructionMark im(this);
3252   simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
3253               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3254   emit_int8(0x00);
3255   emit_operand(dst, src);
3256 }
3257 
3258 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3259   _instruction_uses_vl = true;
3260   assert(isByte(mode), "invalid value");
3261   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3262   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
3263   emit_int8(mode & 0xFF);
3264 }
3265 
3266 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3267   _instruction_uses_vl = true;
3268   assert(isByte(mode), "invalid value");
3269   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3270   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3271   if (VM_Version::supports_evex()) {
3272     _tuple_type = EVEX_FV;
3273     _input_size_in_bits = EVEX_32bit;
3274   }
3275   InstructionMark im(this);
3276   simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
3277   emit_int8(0x70);
3278   emit_operand(dst, src);
3279   emit_int8(mode & 0xFF);
3280 }
3281 
3282 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3283   assert(isByte(mode), "invalid value");
3284   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3285   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
3286   emit_int8(mode & 0xFF);
3287 }
3288 
3289 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3290   assert(isByte(mode), "invalid value");
3291   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3292   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3293   if (VM_Version::supports_evex()) {
3294     _tuple_type = EVEX_FVM;
3295   }
3296   InstructionMark im(this);
3297   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
3298               VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3299   emit_int8(0x70);
3300   emit_operand(dst, src);
3301   emit_int8(mode & 0xFF);
3302 }
3303 
3304 void Assembler::psrldq(XMMRegister dst, int shift) {
3305   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3306   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3307   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
3308   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3309                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3310   emit_int8(0x73);
3311   emit_int8((unsigned char)(0xC0 | encode));
3312   emit_int8(shift);
3313 }
3314 
3315 void Assembler::pslldq(XMMRegister dst, int shift) {
3316   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
3317   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3318   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
3319   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
3320                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
3321   emit_int8(0x73);
3322   emit_int8((unsigned char)(0xC0 | encode));
3323   emit_int8(shift);
3324 }
3325 
3326 void Assembler::ptest(XMMRegister dst, Address src) {
3327   assert(VM_Version::supports_sse4_1(), "");
3328   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3329   InstructionMark im(this);
3330   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3331               VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3332   emit_int8(0x17);
3333   emit_operand(dst, src);
3334 }
3335 
3336 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
3337   assert(VM_Version::supports_sse4_1(), "");
3338   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
3339                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
3340   emit_int8(0x17);
3341   emit_int8((unsigned char)(0xC0 | encode));
3342 }
3343 
3344 void Assembler::vptest(XMMRegister dst, Address src) {
3345   assert(VM_Version::supports_avx(), "");
3346   InstructionMark im(this);
3347   int vector_len = AVX_256bit;
3348   assert(dst != xnoreg, "sanity");
3349   int dst_enc = dst->encoding();
3350   // swap src<->dst for encoding
3351   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
3352              vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
3353   emit_int8(0x17);
3354   emit_operand(dst, src);
3355 }
3356 
3357 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
3358   assert(VM_Version::supports_avx(), "");
3359   int vector_len = AVX_256bit;
3360   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
3361   emit_int8(0x17);
3362   emit_int8((unsigned char)(0xC0 | encode));
3363 }
3364 
3365 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3366   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3367   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3368   if (VM_Version::supports_evex()) {
3369     _tuple_type = EVEX_FVM;
3370   }
3371   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3372 }
3373 
3374 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3375   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3376   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
3377 }
3378 
3379 void Assembler::punpckldq(XMMRegister dst, Address src) {
3380   _instruction_uses_vl = true;
3381   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3382   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3383   if (VM_Version::supports_evex()) {
3384     _tuple_type = EVEX_FV;
3385     _input_size_in_bits = EVEX_32bit;
3386   }
3387   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3388 }
3389 
3390 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3391   _instruction_uses_vl = true;
3392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3393   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3394 }
3395 
3396 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3397   _instruction_uses_vl = true;
3398   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3399   if (VM_Version::supports_evex()) {
3400     emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
3401   } else {
3402     emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3403   }
3404 }
3405 
3406 void Assembler::push(int32_t imm32) {
3407   // in 64bits we push 64bits onto the stack but only
3408   // take a 32bit immediate
3409   emit_int8(0x68);
3410   emit_int32(imm32);
3411 }
3412 
3413 void Assembler::push(Register src) {
3414   int encode = prefix_and_encode(src->encoding());
3415 
3416   emit_int8(0x50 | encode);
3417 }
3418 
3419 void Assembler::pushf() {
3420   emit_int8((unsigned char)0x9C);
3421 }
3422 
3423 #ifndef _LP64 // no 32bit push/pop on amd64
3424 void Assembler::pushl(Address src) {
3425   // Note this will push 64bit on 64bit
3426   InstructionMark im(this);
3427   prefix(src);
3428   emit_int8((unsigned char)0xFF);
3429   emit_operand(rsi, src);
3430 }
3431 #endif
3432 
3433 void Assembler::rcll(Register dst, int imm8) {
3434   assert(isShiftCount(imm8), "illegal shift count");
3435   int encode = prefix_and_encode(dst->encoding());
3436   if (imm8 == 1) {
3437     emit_int8((unsigned char)0xD1);
3438     emit_int8((unsigned char)(0xD0 | encode));
3439   } else {
3440     emit_int8((unsigned char)0xC1);
3441     emit_int8((unsigned char)0xD0 | encode);
3442     emit_int8(imm8);
3443   }
3444 }
3445 
3446 void Assembler::rdtsc() {
3447   emit_int8((unsigned char)0x0F);
3448   emit_int8((unsigned char)0x31);
3449 }
3450 
3451 // copies data from [esi] to [edi] using rcx pointer sized words
3452 // generic
3453 void Assembler::rep_mov() {
3454   emit_int8((unsigned char)0xF3);
3455   // MOVSQ
3456   LP64_ONLY(prefix(REX_W));
3457   emit_int8((unsigned char)0xA5);
3458 }
3459 
3460 // sets rcx bytes with rax, value at [edi]
3461 void Assembler::rep_stosb() {
3462   emit_int8((unsigned char)0xF3); // REP
3463   LP64_ONLY(prefix(REX_W));
3464   emit_int8((unsigned char)0xAA); // STOSB
3465 }
3466 
3467 // sets rcx pointer sized words with rax, value at [edi]
3468 // generic
3469 void Assembler::rep_stos() {
3470   emit_int8((unsigned char)0xF3); // REP
3471   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
3472   emit_int8((unsigned char)0xAB);
3473 }
3474 
3475 // scans rcx pointer sized words at [edi] for occurance of rax,
3476 // generic
3477 void Assembler::repne_scan() { // repne_scan
3478   emit_int8((unsigned char)0xF2);
3479   // SCASQ
3480   LP64_ONLY(prefix(REX_W));
3481   emit_int8((unsigned char)0xAF);
3482 }
3483 
3484 #ifdef _LP64
3485 // scans rcx 4 byte words at [edi] for occurance of rax,
3486 // generic
3487 void Assembler::repne_scanl() { // repne_scan
3488   emit_int8((unsigned char)0xF2);
3489   // SCASL
3490   emit_int8((unsigned char)0xAF);
3491 }
3492 #endif
3493 
3494 void Assembler::ret(int imm16) {
3495   if (imm16 == 0) {
3496     emit_int8((unsigned char)0xC3);
3497   } else {
3498     emit_int8((unsigned char)0xC2);
3499     emit_int16(imm16);
3500   }
3501 }
3502 
3503 void Assembler::sahf() {
3504 #ifdef _LP64
3505   // Not supported in 64bit mode
3506   ShouldNotReachHere();
3507 #endif
3508   emit_int8((unsigned char)0x9E);
3509 }
3510 
3511 void Assembler::sarl(Register dst, int imm8) {
3512   int encode = prefix_and_encode(dst->encoding());
3513   assert(isShiftCount(imm8), "illegal shift count");
3514   if (imm8 == 1) {
3515     emit_int8((unsigned char)0xD1);
3516     emit_int8((unsigned char)(0xF8 | encode));
3517   } else {
3518     emit_int8((unsigned char)0xC1);
3519     emit_int8((unsigned char)(0xF8 | encode));
3520     emit_int8(imm8);
3521   }
3522 }
3523 
3524 void Assembler::sarl(Register dst) {
3525   int encode = prefix_and_encode(dst->encoding());
3526   emit_int8((unsigned char)0xD3);
3527   emit_int8((unsigned char)(0xF8 | encode));
3528 }
3529 
3530 void Assembler::sbbl(Address dst, int32_t imm32) {
3531   InstructionMark im(this);
3532   prefix(dst);
3533   emit_arith_operand(0x81, rbx, dst, imm32);
3534 }
3535 
3536 void Assembler::sbbl(Register dst, int32_t imm32) {
3537   prefix(dst);
3538   emit_arith(0x81, 0xD8, dst, imm32);
3539 }
3540 
3541 
3542 void Assembler::sbbl(Register dst, Address src) {
3543   InstructionMark im(this);
3544   prefix(src, dst);
3545   emit_int8(0x1B);
3546   emit_operand(dst, src);
3547 }
3548 
3549 void Assembler::sbbl(Register dst, Register src) {
3550   (void) prefix_and_encode(dst->encoding(), src->encoding());
3551   emit_arith(0x1B, 0xC0, dst, src);
3552 }
3553 
3554 void Assembler::setb(Condition cc, Register dst) {
3555   assert(0 <= cc && cc < 16, "illegal cc");
3556   int encode = prefix_and_encode(dst->encoding(), true);
3557   emit_int8(0x0F);
3558   emit_int8((unsigned char)0x90 | cc);
3559   emit_int8((unsigned char)(0xC0 | encode));
3560 }
3561 
3562 void Assembler::shll(Register dst, int imm8) {
3563   assert(isShiftCount(imm8), "illegal shift count");
3564   int encode = prefix_and_encode(dst->encoding());
3565   if (imm8 == 1 ) {
3566     emit_int8((unsigned char)0xD1);
3567     emit_int8((unsigned char)(0xE0 | encode));
3568   } else {
3569     emit_int8((unsigned char)0xC1);
3570     emit_int8((unsigned char)(0xE0 | encode));
3571     emit_int8(imm8);
3572   }
3573 }
3574 
3575 void Assembler::shll(Register dst) {
3576   int encode = prefix_and_encode(dst->encoding());
3577   emit_int8((unsigned char)0xD3);
3578   emit_int8((unsigned char)(0xE0 | encode));
3579 }
3580 
3581 void Assembler::shrl(Register dst, int imm8) {
3582   assert(isShiftCount(imm8), "illegal shift count");
3583   int encode = prefix_and_encode(dst->encoding());
3584   emit_int8((unsigned char)0xC1);
3585   emit_int8((unsigned char)(0xE8 | encode));
3586   emit_int8(imm8);
3587 }
3588 
3589 void Assembler::shrl(Register dst) {
3590   int encode = prefix_and_encode(dst->encoding());
3591   emit_int8((unsigned char)0xD3);
3592   emit_int8((unsigned char)(0xE8 | encode));
3593 }
3594 
3595 // copies a single word from [esi] to [edi]
3596 void Assembler::smovl() {
3597   emit_int8((unsigned char)0xA5);
3598 }
3599 
3600 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3601   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3602   if (VM_Version::supports_evex()) {
3603     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3604   } else {
3605     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3606   }
3607 }
3608 
3609 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3610   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3611   if (VM_Version::supports_evex()) {
3612     _tuple_type = EVEX_T1S;
3613     _input_size_in_bits = EVEX_64bit;
3614     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3615   } else {
3616     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3617   }
3618 }
3619 
3620 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3621   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3622   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3623 }
3624 
3625 void Assembler::std() {
3626   emit_int8((unsigned char)0xFD);
3627 }
3628 
3629 void Assembler::sqrtss(XMMRegister dst, Address src) {
3630   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3631   if (VM_Version::supports_evex()) {
3632     _tuple_type = EVEX_T1S;
3633     _input_size_in_bits = EVEX_32bit;
3634   }
3635   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3636 }
3637 
3638 void Assembler::stmxcsr( Address dst) {
3639   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3640   InstructionMark im(this);
3641   prefix(dst);
3642   emit_int8(0x0F);
3643   emit_int8((unsigned char)0xAE);
3644   emit_operand(as_Register(3), dst);
3645 }
3646 
3647 void Assembler::subl(Address dst, int32_t imm32) {
3648   InstructionMark im(this);
3649   prefix(dst);
3650   emit_arith_operand(0x81, rbp, dst, imm32);
3651 }
3652 
3653 void Assembler::subl(Address dst, Register src) {
3654   InstructionMark im(this);
3655   prefix(dst, src);
3656   emit_int8(0x29);
3657   emit_operand(src, dst);
3658 }
3659 
3660 void Assembler::subl(Register dst, int32_t imm32) {
3661   prefix(dst);
3662   emit_arith(0x81, 0xE8, dst, imm32);
3663 }
3664 
3665 // Force generation of a 4 byte immediate value even if it fits into 8bit
3666 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3667   prefix(dst);
3668   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3669 }
3670 
3671 void Assembler::subl(Register dst, Address src) {
3672   InstructionMark im(this);
3673   prefix(src, dst);
3674   emit_int8(0x2B);
3675   emit_operand(dst, src);
3676 }
3677 
3678 void Assembler::subl(Register dst, Register src) {
3679   (void) prefix_and_encode(dst->encoding(), src->encoding());
3680   emit_arith(0x2B, 0xC0, dst, src);
3681 }
3682 
3683 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3684   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3685   if (VM_Version::supports_evex()) {
3686     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3687   } else {
3688     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3689   }
3690 }
3691 
3692 void Assembler::subsd(XMMRegister dst, Address src) {
3693   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3694   if (VM_Version::supports_evex()) {
3695     _tuple_type = EVEX_T1S;
3696     _input_size_in_bits = EVEX_64bit;
3697   }
3698   if (VM_Version::supports_evex()) {
3699     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3700   } else {
3701     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3702   }
3703 }
3704 
3705 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3706   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3707   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3708 }
3709 
3710 void Assembler::subss(XMMRegister dst, Address src) {
3711   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3712   if (VM_Version::supports_evex()) {
3713     _tuple_type = EVEX_T1S;
3714     _input_size_in_bits = EVEX_32bit;
3715   }
3716   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3717 }
3718 
3719 void Assembler::testb(Register dst, int imm8) {
3720   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3721   (void) prefix_and_encode(dst->encoding(), true);
3722   emit_arith_b(0xF6, 0xC0, dst, imm8);
3723 }
3724 
3725 void Assembler::testl(Register dst, int32_t imm32) {
3726   // not using emit_arith because test
3727   // doesn't support sign-extension of
3728   // 8bit operands
3729   int encode = dst->encoding();
3730   if (encode == 0) {
3731     emit_int8((unsigned char)0xA9);
3732   } else {
3733     encode = prefix_and_encode(encode);
3734     emit_int8((unsigned char)0xF7);
3735     emit_int8((unsigned char)(0xC0 | encode));
3736   }
3737   emit_int32(imm32);
3738 }
3739 
3740 void Assembler::testl(Register dst, Register src) {
3741   (void) prefix_and_encode(dst->encoding(), src->encoding());
3742   emit_arith(0x85, 0xC0, dst, src);
3743 }
3744 
3745 void Assembler::testl(Register dst, Address  src) {
3746   InstructionMark im(this);
3747   prefix(src, dst);
3748   emit_int8((unsigned char)0x85);
3749   emit_operand(dst, src);
3750 }
3751 
3752 void Assembler::tzcntl(Register dst, Register src) {
3753   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3754   emit_int8((unsigned char)0xF3);
3755   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3756   emit_int8(0x0F);
3757   emit_int8((unsigned char)0xBC);
3758   emit_int8((unsigned char)0xC0 | encode);
3759 }
3760 
3761 void Assembler::tzcntq(Register dst, Register src) {
3762   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3763   emit_int8((unsigned char)0xF3);
3764   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3765   emit_int8(0x0F);
3766   emit_int8((unsigned char)0xBC);
3767   emit_int8((unsigned char)(0xC0 | encode));
3768 }
3769 
3770 void Assembler::ucomisd(XMMRegister dst, Address src) {
3771   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3772   if (VM_Version::supports_evex()) {
3773     _tuple_type = EVEX_T1S;
3774     _input_size_in_bits = EVEX_64bit;
3775     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3776   } else {
3777     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3778   }
3779 }
3780 
3781 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3782   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3783   if (VM_Version::supports_evex()) {
3784     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
3785   } else {
3786     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3787   }
3788 }
3789 
3790 void Assembler::ucomiss(XMMRegister dst, Address src) {
3791   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3792   if (VM_Version::supports_evex()) {
3793     _tuple_type = EVEX_T1S;
3794     _input_size_in_bits = EVEX_32bit;
3795   }
3796   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3797 }
3798 
3799 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3800   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3801   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
3802 }
3803 
3804 void Assembler::xabort(int8_t imm8) {
3805   emit_int8((unsigned char)0xC6);
3806   emit_int8((unsigned char)0xF8);
3807   emit_int8((unsigned char)(imm8 & 0xFF));
3808 }
3809 
3810 void Assembler::xaddl(Address dst, Register src) {
3811   InstructionMark im(this);
3812   prefix(dst, src);
3813   emit_int8(0x0F);
3814   emit_int8((unsigned char)0xC1);
3815   emit_operand(src, dst);
3816 }
3817 
3818 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3819   InstructionMark im(this);
3820   relocate(rtype);
3821   if (abort.is_bound()) {
3822     address entry = target(abort);
3823     assert(entry != NULL, "abort entry NULL");
3824     intptr_t offset = entry - pc();
3825     emit_int8((unsigned char)0xC7);
3826     emit_int8((unsigned char)0xF8);
3827     emit_int32(offset - 6); // 2 opcode + 4 address
3828   } else {
3829     abort.add_patch_at(code(), locator());
3830     emit_int8((unsigned char)0xC7);
3831     emit_int8((unsigned char)0xF8);
3832     emit_int32(0);
3833   }
3834 }
3835 
3836 void Assembler::xchgl(Register dst, Address src) { // xchg
3837   InstructionMark im(this);
3838   prefix(src, dst);
3839   emit_int8((unsigned char)0x87);
3840   emit_operand(dst, src);
3841 }
3842 
3843 void Assembler::xchgl(Register dst, Register src) {
3844   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3845   emit_int8((unsigned char)0x87);
3846   emit_int8((unsigned char)(0xC0 | encode));
3847 }
3848 
3849 void Assembler::xend() {
3850   emit_int8((unsigned char)0x0F);
3851   emit_int8((unsigned char)0x01);
3852   emit_int8((unsigned char)0xD5);
3853 }
3854 
3855 void Assembler::xgetbv() {
3856   emit_int8(0x0F);
3857   emit_int8(0x01);
3858   emit_int8((unsigned char)0xD0);
3859 }
3860 
3861 void Assembler::xorl(Register dst, int32_t imm32) {
3862   prefix(dst);
3863   emit_arith(0x81, 0xF0, dst, imm32);
3864 }
3865 
3866 void Assembler::xorl(Register dst, Address src) {
3867   InstructionMark im(this);
3868   prefix(src, dst);
3869   emit_int8(0x33);
3870   emit_operand(dst, src);
3871 }
3872 
3873 void Assembler::xorl(Register dst, Register src) {
3874   (void) prefix_and_encode(dst->encoding(), src->encoding());
3875   emit_arith(0x33, 0xC0, dst, src);
3876 }
3877 
3878 
3879 // AVX 3-operands scalar float-point arithmetic instructions
3880 
3881 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3882   assert(VM_Version::supports_avx(), "");
3883   if (VM_Version::supports_evex()) {
3884     _tuple_type = EVEX_T1S;
3885     _input_size_in_bits = EVEX_64bit;
3886     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3887   } else {
3888     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3889   }
3890 }
3891 
3892 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3893   assert(VM_Version::supports_avx(), "");
3894   if (VM_Version::supports_evex()) {
3895     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3896   } else {
3897     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3898   }
3899 }
3900 
3901 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3902   assert(VM_Version::supports_avx(), "");
3903   if (VM_Version::supports_evex()) {
3904     _tuple_type = EVEX_T1S;
3905     _input_size_in_bits = EVEX_32bit;
3906   }
3907   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3908 }
3909 
3910 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3911   assert(VM_Version::supports_avx(), "");
3912   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3913 }
3914 
3915 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3916   assert(VM_Version::supports_avx(), "");
3917   if (VM_Version::supports_evex()) {
3918     _tuple_type = EVEX_T1S;
3919     _input_size_in_bits = EVEX_64bit;
3920     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3921   } else {
3922     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3923   }
3924 }
3925 
3926 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3927   assert(VM_Version::supports_avx(), "");
3928   if (VM_Version::supports_evex()) {
3929     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3930   } else {
3931     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3932   }
3933 }
3934 
3935 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3936   assert(VM_Version::supports_avx(), "");
3937   if (VM_Version::supports_evex()) {
3938     _tuple_type = EVEX_T1S;
3939     _input_size_in_bits = EVEX_32bit;
3940   }
3941   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3942 }
3943 
3944 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3945   assert(VM_Version::supports_avx(), "");
3946   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3947 }
3948 
3949 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3950   assert(VM_Version::supports_avx(), "");
3951   if (VM_Version::supports_evex()) {
3952     _tuple_type = EVEX_T1S;
3953     _input_size_in_bits = EVEX_64bit;
3954     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3955   } else {
3956     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3957   }
3958 }
3959 
3960 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3961   assert(VM_Version::supports_avx(), "");
3962   if (VM_Version::supports_evex()) {
3963     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3964   } else {
3965     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3966   }
3967 }
3968 
3969 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3970   assert(VM_Version::supports_avx(), "");
3971   if (VM_Version::supports_evex()) {
3972     _tuple_type = EVEX_T1S;
3973     _input_size_in_bits = EVEX_32bit;
3974   }
3975   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3976 }
3977 
3978 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3979   assert(VM_Version::supports_avx(), "");
3980   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3981 }
3982 
3983 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3984   assert(VM_Version::supports_avx(), "");
3985   if (VM_Version::supports_evex()) {
3986     _tuple_type = EVEX_T1S;
3987     _input_size_in_bits = EVEX_64bit;
3988     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3989   } else {
3990     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3991   }
3992 }
3993 
3994 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3995   assert(VM_Version::supports_avx(), "");
3996   if (VM_Version::supports_evex()) {
3997     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3998   } else {
3999     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
4000   }
4001 }
4002 
4003 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4004   assert(VM_Version::supports_avx(), "");
4005   if (VM_Version::supports_evex()) {
4006     _tuple_type = EVEX_T1S;
4007     _input_size_in_bits = EVEX_32bit;
4008   }
4009   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
4010 }
4011 
4012 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4013   assert(VM_Version::supports_avx(), "");
4014   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
4015 }
4016 
4017 //====================VECTOR ARITHMETIC=====================================
4018 
4019 // Float-point vector arithmetic
4020 
4021 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
4022   _instruction_uses_vl = true;
4023   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4024   if (VM_Version::supports_evex()) {
4025     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
4026   } else {
4027     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
4028   }
4029 }
4030 
4031 void Assembler::addps(XMMRegister dst, XMMRegister src) {
4032   _instruction_uses_vl = true;
4033   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4034   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
4035 }
4036 
4037 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4038   _instruction_uses_vl = true;
4039   assert(VM_Version::supports_avx(), "");
4040   if (VM_Version::supports_evex()) {
4041     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4042   } else {
4043     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4044   }
4045 }
4046 
4047 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4048   _instruction_uses_vl = true;
4049   assert(VM_Version::supports_avx(), "");
4050   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4051 }
4052 
4053 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4054   _instruction_uses_vl = true;
4055   assert(VM_Version::supports_avx(), "");
4056   if (VM_Version::supports_evex()) {
4057     _tuple_type = EVEX_FV;
4058     _input_size_in_bits = EVEX_64bit;
4059     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4060   } else {
4061     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
4062   }
4063 }
4064 
4065 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4066   _instruction_uses_vl = true;
4067   assert(VM_Version::supports_avx(), "");
4068   if (VM_Version::supports_evex()) {
4069     _tuple_type = EVEX_FV;
4070     _input_size_in_bits = EVEX_32bit;
4071   }
4072   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
4073 }
4074 
4075 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
4076   _instruction_uses_vl = true;
4077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4078   if (VM_Version::supports_evex()) {
4079     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
4080   } else {
4081     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
4082   }
4083 }
4084 
4085 void Assembler::subps(XMMRegister dst, XMMRegister src) {
4086   _instruction_uses_vl = true;
4087   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4088   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
4089 }
4090 
4091 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4092   _instruction_uses_vl = true;
4093   assert(VM_Version::supports_avx(), "");
4094   if (VM_Version::supports_evex()) {
4095     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4096   } else {
4097     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4098   }
4099 }
4100 
4101 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4102   _instruction_uses_vl = true;
4103   assert(VM_Version::supports_avx(), "");
4104   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4105 }
4106 
4107 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4108   _instruction_uses_vl = true;
4109   assert(VM_Version::supports_avx(), "");
4110   if (VM_Version::supports_evex()) {
4111     _tuple_type = EVEX_FV;
4112     _input_size_in_bits = EVEX_64bit;
4113     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4114   } else {
4115     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
4116   }
4117 }
4118 
4119 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4120   _instruction_uses_vl = true;
4121   assert(VM_Version::supports_avx(), "");
4122   if (VM_Version::supports_evex()) {
4123     _tuple_type = EVEX_FV;
4124     _input_size_in_bits = EVEX_32bit;
4125   }
4126   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
4127 }
4128 
4129 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
4130   _instruction_uses_vl = true;
4131   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4132   if (VM_Version::supports_evex()) {
4133     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4134   } else {
4135     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4136   }
4137 }
4138 
4139 void Assembler::mulpd(XMMRegister dst, Address src) {
4140   _instruction_uses_vl = true;
4141   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4142   if (VM_Version::supports_evex()) {
4143     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
4144   } else {
4145     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
4146   }
4147 }
4148 
4149 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
4150   _instruction_uses_vl = true;
4151   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4152   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
4153 }
4154 
4155 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4156   _instruction_uses_vl = true;
4157   assert(VM_Version::supports_avx(), "");
4158   if (VM_Version::supports_evex()) {
4159     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4160   } else {
4161     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4162   }
4163 }
4164 
4165 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4166   _instruction_uses_vl = true;
4167   assert(VM_Version::supports_avx(), "");
4168   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4169 }
4170 
4171 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4172   _instruction_uses_vl = true;
4173   assert(VM_Version::supports_avx(), "");
4174   if (VM_Version::supports_evex()) {
4175     _tuple_type = EVEX_FV;
4176     _input_size_in_bits = EVEX_64bit;
4177     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4178   } else {
4179     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4180   }
4181 }
4182 
4183 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4184   _instruction_uses_vl = true;
4185   assert(VM_Version::supports_avx(), "");
4186   if (VM_Version::supports_evex()) {
4187     _tuple_type = EVEX_FV;
4188     _input_size_in_bits = EVEX_32bit;
4189   }
4190   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4191 }
4192 
4193 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
4194   _instruction_uses_vl = true;
4195   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4196   if (VM_Version::supports_evex()) {
4197     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
4198   } else {
4199     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
4200   }
4201 }
4202 
4203 void Assembler::divps(XMMRegister dst, XMMRegister src) {
4204   _instruction_uses_vl = true;
4205   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4206   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
4207 }
4208 
4209 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4210   _instruction_uses_vl = true;
4211   assert(VM_Version::supports_avx(), "");
4212   if (VM_Version::supports_evex()) {
4213     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4214   } else {
4215     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4216   }
4217 }
4218 
4219 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4220   _instruction_uses_vl = true;
4221   assert(VM_Version::supports_avx(), "");
4222   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4223 }
4224 
4225 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4226   _instruction_uses_vl = true;
4227   assert(VM_Version::supports_avx(), "");
4228   if (VM_Version::supports_evex()) {
4229     _tuple_type = EVEX_FV;
4230     _input_size_in_bits = EVEX_64bit;
4231     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4232   } else {
4233     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4234   }
4235 }
4236 
4237 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4238   _instruction_uses_vl = true;
4239   assert(VM_Version::supports_avx(), "");
4240   if (VM_Version::supports_evex()) {
4241     _tuple_type = EVEX_FV;
4242     _input_size_in_bits = EVEX_32bit;
4243   }
4244   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4245 }
4246 
4247 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
4248   _instruction_uses_vl = true;
4249   assert(VM_Version::supports_avx(), "");
4250   if (VM_Version::supports_evex()) {
4251     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4252   } else {
4253     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4254   }
4255 }
4256 
4257 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
4258   _instruction_uses_vl = true;
4259   assert(VM_Version::supports_avx(), "");
4260   if (VM_Version::supports_evex()) {
4261     _tuple_type = EVEX_FV;
4262     _input_size_in_bits = EVEX_64bit;
4263     emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4264   } else {
4265     emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
4266   }
4267 }
4268 
4269 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
4270   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4271   if (VM_Version::supports_avx512dq()) {
4272     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4273   } else {
4274     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4275   }
4276 }
4277 
4278 void Assembler::andps(XMMRegister dst, XMMRegister src) {
4279   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4280   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4281 }
4282 
4283 void Assembler::andps(XMMRegister dst, Address src) {
4284   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4285   if (VM_Version::supports_evex()) {
4286     _tuple_type = EVEX_FV;
4287     _input_size_in_bits = EVEX_32bit;
4288   }
4289   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4290 }
4291 
4292 void Assembler::andpd(XMMRegister dst, Address src) {
4293   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4294   if (VM_Version::supports_avx512dq()) {
4295     _tuple_type = EVEX_FV;
4296     _input_size_in_bits = EVEX_64bit;
4297     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4298   } else {
4299     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4300   }
4301 }
4302 
4303 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4304   assert(VM_Version::supports_avx(), "");
4305   if (VM_Version::supports_avx512dq()) {
4306     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4307   } else {
4308     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4309   }
4310 }
4311 
4312 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4313   assert(VM_Version::supports_avx(), "");
4314   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
4315 }
4316 
4317 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4318   assert(VM_Version::supports_avx(), "");
4319   if (VM_Version::supports_avx512dq()) {
4320     _tuple_type = EVEX_FV;
4321     _input_size_in_bits = EVEX_64bit;
4322     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4323   } else {
4324     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4325   }
4326 }
4327 
4328 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4329   assert(VM_Version::supports_avx(), "");
4330   if (VM_Version::supports_evex()) {
4331     _tuple_type = EVEX_FV;
4332     _input_size_in_bits = EVEX_32bit;
4333   }
4334   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4335 }
4336 
4337 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
4338   _instruction_uses_vl = true;
4339   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4340   if (VM_Version::supports_evex()) {
4341     emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
4342   } else {
4343     emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
4344   }
4345 }
4346 
4347 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
4348   _instruction_uses_vl = true;
4349   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4350   if (VM_Version::supports_evex()) {
4351     emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
4352   } else {
4353     emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
4354   }
4355 }
4356 
4357 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
4358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4359   if (VM_Version::supports_avx512dq()) {
4360     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4361   } else {
4362     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4363   }
4364 }
4365 
4366 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
4367   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4368   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4369 }
4370 
4371 void Assembler::xorpd(XMMRegister dst, Address src) {
4372   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4373   if (VM_Version::supports_avx512dq()) {
4374     _tuple_type = EVEX_FV;
4375     _input_size_in_bits = EVEX_64bit;
4376     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4377   } else {
4378     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
4379   }
4380 }
4381 
4382 void Assembler::xorps(XMMRegister dst, Address src) {
4383   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4384   if (VM_Version::supports_evex()) {
4385     _tuple_type = EVEX_FV;
4386     _input_size_in_bits = EVEX_32bit;
4387   }
4388   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4389 }
4390 
4391 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4392   assert(VM_Version::supports_avx(), "");
4393   if (VM_Version::supports_avx512dq()) {
4394     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4395   } else {
4396     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4397   }
4398 }
4399 
4400 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4401   assert(VM_Version::supports_avx(), "");
4402   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4403 }
4404 
4405 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4406   assert(VM_Version::supports_avx(), "");
4407   if (VM_Version::supports_avx512dq()) {
4408     _tuple_type = EVEX_FV;
4409     _input_size_in_bits = EVEX_64bit;
4410     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4411   } else {
4412     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
4413   }
4414 }
4415 
4416 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4417   assert(VM_Version::supports_avx(), "");
4418   if (VM_Version::supports_evex()) {
4419     _tuple_type = EVEX_FV;
4420     _input_size_in_bits = EVEX_32bit;
4421   }
4422   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
4423 }
4424 
4425 // Integer vector arithmetic
4426 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4427   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4428          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4429   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4430   emit_int8(0x01);
4431   emit_int8((unsigned char)(0xC0 | encode));
4432 }
4433 
4434 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4435   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4436          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4437   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
4438   emit_int8(0x02);
4439   emit_int8((unsigned char)(0xC0 | encode));
4440 }
4441 
4442 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
4443   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4444   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4445 }
4446 
4447 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
4448   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4449   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4450 }
4451 
4452 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4453   _instruction_uses_vl = true;
4454   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4455   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4456 }
4457 
4458 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4459   _instruction_uses_vl = true;
4460   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4461   if (VM_Version::supports_evex()) {
4462     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4463   } else {
4464     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4465   }
4466 }
4467 
4468 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4469   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4470   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4471                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4472   emit_int8(0x01);
4473   emit_int8((unsigned char)(0xC0 | encode));
4474 }
4475 
4476 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4477   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4478   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
4479                                       VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
4480   emit_int8(0x02);
4481   emit_int8((unsigned char)(0xC0 | encode));
4482 }
4483 
4484 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4485   assert(UseAVX > 0, "requires some form of AVX");
4486   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4487 }
4488 
4489 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4490   assert(UseAVX > 0, "requires some form of AVX");
4491   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4492 }
4493 
4494 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4495   _instruction_uses_vl = true;
4496   assert(UseAVX > 0, "requires some form of AVX");
4497   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4498 }
4499 
4500 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4501   _instruction_uses_vl = true;
4502   assert(UseAVX > 0, "requires some form of AVX");
4503   if (VM_Version::supports_evex()) {
4504     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4505   } else {
4506     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4507   }
4508 }
4509 
4510 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4511   assert(UseAVX > 0, "requires some form of AVX");
4512   if (VM_Version::supports_evex()) {
4513     _tuple_type = EVEX_FVM;
4514   }
4515   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4516 }
4517 
4518 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4519   assert(UseAVX > 0, "requires some form of AVX");
4520   if (VM_Version::supports_evex()) {
4521     _tuple_type = EVEX_FVM;
4522   }
4523   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4524 }
4525 
4526 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4527   _instruction_uses_vl = true;
4528   assert(UseAVX > 0, "requires some form of AVX");
4529   if (VM_Version::supports_evex()) {
4530     _tuple_type = EVEX_FV;
4531     _input_size_in_bits = EVEX_32bit;
4532   }
4533   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4534 }
4535 
4536 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4537   _instruction_uses_vl = true;
4538   assert(UseAVX > 0, "requires some form of AVX");
4539   if (VM_Version::supports_evex()) {
4540     _tuple_type = EVEX_FV;
4541     _input_size_in_bits = EVEX_64bit;
4542     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4543   } else {
4544     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4545   }
4546 }
4547 
4548 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4549   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4550   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4551 }
4552 
4553 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4554   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4555   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4556 }
4557 
4558 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4559   _instruction_uses_vl = true;
4560   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4561   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4562 }
4563 
4564 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4565   _instruction_uses_vl = true;
4566   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4567   if (VM_Version::supports_evex()) {
4568     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4569   } else {
4570     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4571   }
4572 }
4573 
4574 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4575   assert(UseAVX > 0, "requires some form of AVX");
4576   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4577 }
4578 
4579 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4580   assert(UseAVX > 0, "requires some form of AVX");
4581   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4582 }
4583 
4584 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4585   _instruction_uses_vl = true;
4586   assert(UseAVX > 0, "requires some form of AVX");
4587   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4588 }
4589 
4590 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4591   _instruction_uses_vl = true;
4592   assert(UseAVX > 0, "requires some form of AVX");
4593   if (VM_Version::supports_evex()) {
4594     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4595   } else {
4596     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4597   }
4598 }
4599 
4600 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4601   assert(UseAVX > 0, "requires some form of AVX");
4602   if (VM_Version::supports_evex()) {
4603     _tuple_type = EVEX_FVM;
4604   }
4605   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4606 }
4607 
4608 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4609   assert(UseAVX > 0, "requires some form of AVX");
4610   if (VM_Version::supports_evex()) {
4611     _tuple_type = EVEX_FVM;
4612   }
4613   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4614 }
4615 
4616 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4617   _instruction_uses_vl = true;
4618   assert(UseAVX > 0, "requires some form of AVX");
4619   if (VM_Version::supports_evex()) {
4620     _tuple_type = EVEX_FV;
4621     _input_size_in_bits = EVEX_32bit;
4622   }
4623   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4624 }
4625 
4626 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4627   _instruction_uses_vl = true;
4628   assert(UseAVX > 0, "requires some form of AVX");
4629   if (VM_Version::supports_evex()) {
4630     _tuple_type = EVEX_FV;
4631     _input_size_in_bits = EVEX_64bit;
4632     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4633   } else {
4634     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4635   }
4636 }
4637 
4638 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4639   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4640   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4641 }
4642 
4643 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4644   _instruction_uses_vl = true;
4645   assert(VM_Version::supports_sse4_1(), "");
4646   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
4647                                       /* no_mask_reg */ false, VEX_OPCODE_0F_38);
4648   emit_int8(0x40);
4649   emit_int8((unsigned char)(0xC0 | encode));
4650 }
4651 
4652 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4653   assert(UseAVX > 0, "requires some form of AVX");
4654   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4655 }
4656 
4657 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4658   _instruction_uses_vl = true;
4659   assert(UseAVX > 0, "requires some form of AVX");
4660   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
4661   emit_int8(0x40);
4662   emit_int8((unsigned char)(0xC0 | encode));
4663 }
4664 
4665 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4666   assert(UseAVX > 2, "requires some form of AVX");
4667   int src_enc = src->encoding();
4668   int dst_enc = dst->encoding();
4669   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4670   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
4671                                      /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
4672   emit_int8(0x40);
4673   emit_int8((unsigned char)(0xC0 | encode));
4674 }
4675 
4676 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4677   assert(UseAVX > 0, "requires some form of AVX");
4678   if (VM_Version::supports_evex()) {
4679     _tuple_type = EVEX_FVM;
4680   }
4681   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4682 }
4683 
4684 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4685   _instruction_uses_vl = true;
4686   assert(UseAVX > 0, "requires some form of AVX");
4687   if (VM_Version::supports_evex()) {
4688     _tuple_type = EVEX_FV;
4689     _input_size_in_bits = EVEX_32bit;
4690   }
4691   InstructionMark im(this);
4692   int dst_enc = dst->encoding();
4693   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4694   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4695              VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
4696   emit_int8(0x40);
4697   emit_operand(dst, src);
4698 }
4699 
4700 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4701   assert(UseAVX > 0, "requires some form of AVX");
4702   if (VM_Version::supports_evex()) {
4703     _tuple_type = EVEX_FV;
4704     _input_size_in_bits = EVEX_64bit;
4705   }
4706   InstructionMark im(this);
4707   int dst_enc = dst->encoding();
4708   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4709   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4710              VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
4711   emit_int8(0x40);
4712   emit_operand(dst, src);
4713 }
4714 
4715 // Shift packed integers left by specified number of bits.
4716 void Assembler::psllw(XMMRegister dst, int shift) {
4717   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4718   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4719   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
4720                                       /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4721   emit_int8(0x71);
4722   emit_int8((unsigned char)(0xC0 | encode));
4723   emit_int8(shift & 0xFF);
4724 }
4725 
4726 void Assembler::pslld(XMMRegister dst, int shift) {
4727   _instruction_uses_vl = true;
4728   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4729   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4730   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4731   emit_int8(0x72);
4732   emit_int8((unsigned char)(0xC0 | encode));
4733   emit_int8(shift & 0xFF);
4734 }
4735 
4736 void Assembler::psllq(XMMRegister dst, int shift) {
4737   _instruction_uses_vl = true;
4738   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4739   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4740   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
4741   emit_int8(0x73);
4742   emit_int8((unsigned char)(0xC0 | encode));
4743   emit_int8(shift & 0xFF);
4744 }
4745 
4746 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4747   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4748   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4749 }
4750 
4751 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4752   _instruction_uses_vl = true;
4753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4754   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4755 }
4756 
4757 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4758   _instruction_uses_vl = true;
4759   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4760   if (VM_Version::supports_evex()) {
4761     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4762   } else {
4763     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4764   }
4765 }
4766 
4767 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4768   assert(UseAVX > 0, "requires some form of AVX");
4769   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4770   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4771   emit_int8(shift & 0xFF);
4772 }
4773 
4774 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4775   _instruction_uses_vl = true;
4776   assert(UseAVX > 0, "requires some form of AVX");
4777   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4778   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4779   emit_int8(shift & 0xFF);
4780 }
4781 
4782 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4783   _instruction_uses_vl = true;
4784   assert(UseAVX > 0, "requires some form of AVX");
4785   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4786   if (VM_Version::supports_evex()) {
4787     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4788   } else {
4789     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4790   }
4791   emit_int8(shift & 0xFF);
4792 }
4793 
4794 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4795   assert(UseAVX > 0, "requires some form of AVX");
4796   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4797 }
4798 
4799 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4800   _instruction_uses_vl = true;
4801   assert(UseAVX > 0, "requires some form of AVX");
4802   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4803 }
4804 
4805 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4806   _instruction_uses_vl = true;
4807   assert(UseAVX > 0, "requires some form of AVX");
4808   if (VM_Version::supports_evex()) {
4809     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4810   } else {
4811     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4812   }
4813 }
4814 
4815 // Shift packed integers logically right by specified number of bits.
4816 void Assembler::psrlw(XMMRegister dst, int shift) {
4817   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4818   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4819   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4820                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4821   emit_int8(0x71);
4822   emit_int8((unsigned char)(0xC0 | encode));
4823   emit_int8(shift & 0xFF);
4824 }
4825 
4826 void Assembler::psrld(XMMRegister dst, int shift) {
4827   _instruction_uses_vl = true;
4828   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4829   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4830   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4831   emit_int8(0x72);
4832   emit_int8((unsigned char)(0xC0 | encode));
4833   emit_int8(shift & 0xFF);
4834 }
4835 
4836 void Assembler::psrlq(XMMRegister dst, int shift) {
4837   _instruction_uses_vl = true;
4838   // Do not confuse it with psrldq SSE2 instruction which
4839   // shifts 128 bit value in xmm register by number of bytes.
4840   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4841   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4842   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4843                                       VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
4844   emit_int8(0x73);
4845   emit_int8((unsigned char)(0xC0 | encode));
4846   emit_int8(shift & 0xFF);
4847 }
4848 
4849 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4850   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4851   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4852 }
4853 
4854 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4855   _instruction_uses_vl = true;
4856   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4857   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4858 }
4859 
4860 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4861   _instruction_uses_vl = true;
4862   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4863   if (VM_Version::supports_evex()) {
4864     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4865   } else {
4866     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4867   }
4868 }
4869 
4870 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4871   assert(UseAVX > 0, "requires some form of AVX");
4872   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4873   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4874   emit_int8(shift & 0xFF);
4875 }
4876 
4877 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4878   _instruction_uses_vl = true;
4879   assert(UseAVX > 0, "requires some form of AVX");
4880   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4881   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4882   emit_int8(shift & 0xFF);
4883 }
4884 
4885 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4886   _instruction_uses_vl = true;
4887   assert(UseAVX > 0, "requires some form of AVX");
4888   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4889   if (VM_Version::supports_evex()) {
4890     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4891   } else {
4892     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4893   }
4894   emit_int8(shift & 0xFF);
4895 }
4896 
4897 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4898   assert(UseAVX > 0, "requires some form of AVX");
4899   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4900 }
4901 
4902 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4903   _instruction_uses_vl = true;
4904   assert(UseAVX > 0, "requires some form of AVX");
4905   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4906 }
4907 
4908 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4909   _instruction_uses_vl = true;
4910   assert(UseAVX > 0, "requires some form of AVX");
4911   if (VM_Version::supports_evex()) {
4912     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4913   } else {
4914     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4915   }
4916 }
4917 
4918 // Shift packed integers arithmetically right by specified number of bits.
4919 void Assembler::psraw(XMMRegister dst, int shift) {
4920   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4921   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4922   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
4923                                       VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
4924   emit_int8(0x71);
4925   emit_int8((unsigned char)(0xC0 | encode));
4926   emit_int8(shift & 0xFF);
4927 }
4928 
4929 void Assembler::psrad(XMMRegister dst, int shift) {
4930   _instruction_uses_vl = true;
4931   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4932   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4933   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
4934   emit_int8(0x72);
4935   emit_int8((unsigned char)(0xC0 | encode));
4936   emit_int8(shift & 0xFF);
4937 }
4938 
4939 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4940   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4941   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4942 }
4943 
4944 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4945   _instruction_uses_vl = true;
4946   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4947   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4948 }
4949 
4950 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4951   assert(UseAVX > 0, "requires some form of AVX");
4952   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4953   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4954   emit_int8(shift & 0xFF);
4955 }
4956 
4957 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4958   _instruction_uses_vl = true;
4959   assert(UseAVX > 0, "requires some form of AVX");
4960   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4961   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4962   emit_int8(shift & 0xFF);
4963 }
4964 
4965 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4966   assert(UseAVX > 0, "requires some form of AVX");
4967   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
4968 }
4969 
4970 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4971   _instruction_uses_vl = true;
4972   assert(UseAVX > 0, "requires some form of AVX");
4973   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4974 }
4975 
4976 
4977 // logical operations packed integers
4978 void Assembler::pand(XMMRegister dst, XMMRegister src) {
4979   _instruction_uses_vl = true;
4980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4981   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
4982 }
4983 
4984 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4985   _instruction_uses_vl = true;
4986   assert(UseAVX > 0, "requires some form of AVX");
4987   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4988 }
4989 
4990 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4991   _instruction_uses_vl = true;
4992   assert(UseAVX > 0, "requires some form of AVX");
4993   if (VM_Version::supports_evex()) {
4994     _tuple_type = EVEX_FV;
4995     _input_size_in_bits = EVEX_32bit;
4996   }
4997   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4998 }
4999 
5000 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
5001   _instruction_uses_vl = true;
5002   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5003   if (VM_Version::supports_evex()) {
5004     emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
5005   }
5006   else {
5007     emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
5008   }
5009 }
5010 
5011 void Assembler::por(XMMRegister dst, XMMRegister src) {
5012   _instruction_uses_vl = true;
5013   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5014   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
5015 }
5016 
5017 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5018   _instruction_uses_vl = true;
5019   assert(UseAVX > 0, "requires some form of AVX");
5020   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
5021 }
5022 
5023 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5024   _instruction_uses_vl = true;
5025   assert(UseAVX > 0, "requires some form of AVX");
5026   if (VM_Version::supports_evex()) {
5027     _tuple_type = EVEX_FV;
5028     _input_size_in_bits = EVEX_32bit;
5029   }
5030   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
5031 }
5032 
5033 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
5034   _instruction_uses_vl = true;
5035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5036   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
5037 }
5038 
5039 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5040   _instruction_uses_vl = true;
5041   assert(UseAVX > 0, "requires some form of AVX");
5042   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5043 }
5044 
5045 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5046   _instruction_uses_vl = true;
5047   assert(UseAVX > 0, "requires some form of AVX");
5048   if (VM_Version::supports_evex()) {
5049     _tuple_type = EVEX_FV;
5050     _input_size_in_bits = EVEX_32bit;
5051   }
5052   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
5053 }
5054 
5055 
5056 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5057   assert(VM_Version::supports_avx(), "");
5058   int vector_len = AVX_256bit;
5059   if (VM_Version::supports_evex()) {
5060     vector_len = AVX_512bit;
5061   }
5062   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5063   emit_int8(0x18);
5064   emit_int8((unsigned char)(0xC0 | encode));
5065   // 0x00 - insert into lower 128 bits
5066   // 0x01 - insert into upper 128 bits
5067   emit_int8(0x01);
5068 }
5069 
5070 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5071   assert(VM_Version::supports_evex(), "");
5072   int vector_len = AVX_512bit;
5073   int src_enc = src->encoding();
5074   int dst_enc = dst->encoding();
5075   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5076   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5077                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5078   emit_int8(0x1A);
5079   emit_int8((unsigned char)(0xC0 | encode));
5080   // 0x00 - insert into lower 256 bits
5081   // 0x01 - insert into upper 256 bits
5082   emit_int8(0x01);
5083 }
5084 
5085 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
5086   assert(VM_Version::supports_evex(), "");
5087   _tuple_type = EVEX_T4;
5088   _input_size_in_bits = EVEX_64bit;
5089   InstructionMark im(this);
5090   int vector_len = AVX_512bit;
5091   assert(dst != xnoreg, "sanity");
5092   int dst_enc = dst->encoding();
5093   // swap src<->dst for encoding
5094   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
5095   emit_int8(0x1A);
5096   emit_operand(dst, src);
5097   // 0x01 - insert into upper 128 bits
5098   emit_int8(0x01);
5099 }
5100 
5101 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
5102   assert(VM_Version::supports_evex(), "");
5103   int vector_len = AVX_512bit;
5104   int src_enc = src->encoding();
5105   int dst_enc = dst->encoding();
5106   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5107   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5108                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5109   emit_int8(0x18);
5110   emit_int8((unsigned char)(0xC0 | encode));
5111   // 0x00 - insert into q0 128 bits (0..127)
5112   // 0x01 - insert into q1 128 bits (128..255)
5113   // 0x02 - insert into q2 128 bits (256..383)
5114   // 0x03 - insert into q3 128 bits (384..511)
5115   emit_int8(value & 0x3);
5116 }
5117 
5118 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
5119   assert(VM_Version::supports_evex(), "");
5120   _tuple_type = EVEX_T4;
5121   _input_size_in_bits = EVEX_32bit;
5122   InstructionMark im(this);
5123   int vector_len = AVX_512bit;
5124   assert(dst != xnoreg, "sanity");
5125   int dst_enc = dst->encoding();
5126   // swap src<->dst for encoding
5127   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5128   emit_int8(0x18);
5129   emit_operand(dst, src);
5130   // 0x00 - insert into q0 128 bits (0..127)
5131   // 0x01 - insert into q1 128 bits (128..255)
5132   // 0x02 - insert into q2 128 bits (256..383)
5133   // 0x03 - insert into q3 128 bits (384..511)
5134   emit_int8(value & 0x3);
5135 }
5136 
5137 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
5138   assert(VM_Version::supports_avx(), "");
5139   int vector_len = AVX_256bit;
5140   if (VM_Version::supports_evex()) {
5141     _tuple_type = EVEX_T4;
5142     _input_size_in_bits = EVEX_32bit;
5143     vector_len = AVX_512bit;
5144   }
5145   InstructionMark im(this);
5146   assert(dst != xnoreg, "sanity");
5147   int dst_enc = dst->encoding();
5148   // swap src<->dst for encoding
5149   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5150   emit_int8(0x18);
5151   emit_operand(dst, src);
5152   // 0x01 - insert into upper 128 bits
5153   emit_int8(0x01);
5154 }
5155 
5156 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
5157   assert(VM_Version::supports_avx(), "");
5158   int vector_len = AVX_256bit;
5159   if (VM_Version::supports_evex()) {
5160     vector_len = AVX_512bit;
5161   }
5162   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5163   emit_int8(0x19);
5164   emit_int8((unsigned char)(0xC0 | encode));
5165   // 0x00 - insert into lower 128 bits
5166   // 0x01 - insert into upper 128 bits
5167   emit_int8(0x01);
5168 }
5169 
5170 void Assembler::vextractf128h(Address dst, XMMRegister src) {
5171   assert(VM_Version::supports_avx(), "");
5172   int vector_len = AVX_256bit;
5173   if (VM_Version::supports_evex()) {
5174     _tuple_type = EVEX_T4;
5175     _input_size_in_bits = EVEX_32bit;
5176     vector_len = AVX_512bit;
5177   }
5178   InstructionMark im(this);
5179   assert(src != xnoreg, "sanity");
5180   int src_enc = src->encoding();
5181   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5182   emit_int8(0x19);
5183   emit_operand(src, dst);
5184   // 0x01 - extract from upper 128 bits
5185   emit_int8(0x01);
5186 }
5187 
5188 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5189   assert(VM_Version::supports_avx2(), "");
5190   int vector_len = AVX_256bit;
5191   if (VM_Version::supports_evex()) {
5192     vector_len = AVX_512bit;
5193   }
5194   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5195   emit_int8(0x38);
5196   emit_int8((unsigned char)(0xC0 | encode));
5197   // 0x00 - insert into lower 128 bits
5198   // 0x01 - insert into upper 128 bits
5199   emit_int8(0x01);
5200 }
5201 
5202 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
5203   assert(VM_Version::supports_evex(), "");
5204   int vector_len = AVX_512bit;
5205   int src_enc = src->encoding();
5206   int dst_enc = dst->encoding();
5207   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5208   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5209                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
5210   emit_int8(0x38);
5211   emit_int8((unsigned char)(0xC0 | encode));
5212   // 0x00 - insert into lower 256 bits
5213   // 0x01 - insert into upper 256 bits
5214   emit_int8(0x01);
5215 }
5216 
5217 void Assembler::vinserti128h(XMMRegister dst, Address src) {
5218   assert(VM_Version::supports_avx2(), "");
5219   int vector_len = AVX_256bit;
5220   if (VM_Version::supports_evex()) {
5221     _tuple_type = EVEX_T4;
5222     _input_size_in_bits = EVEX_32bit;
5223     vector_len = AVX_512bit;
5224   }
5225   InstructionMark im(this);
5226   assert(dst != xnoreg, "sanity");
5227   int dst_enc = dst->encoding();
5228   // swap src<->dst for encoding
5229   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5230   emit_int8(0x38);
5231   emit_operand(dst, src);
5232   // 0x01 - insert into upper 128 bits
5233   emit_int8(0x01);
5234 }
5235 
5236 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
5237   assert(VM_Version::supports_avx(), "");
5238   int vector_len = AVX_256bit;
5239   if (VM_Version::supports_evex()) {
5240     vector_len = AVX_512bit;
5241   }
5242   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
5243   emit_int8(0x39);
5244   emit_int8((unsigned char)(0xC0 | encode));
5245   // 0x00 - insert into lower 128 bits
5246   // 0x01 - insert into upper 128 bits
5247   emit_int8(0x01);
5248 }
5249 
5250 void Assembler::vextracti128h(Address dst, XMMRegister src) {
5251   assert(VM_Version::supports_avx2(), "");
5252   int vector_len = AVX_256bit;
5253   if (VM_Version::supports_evex()) {
5254     _tuple_type = EVEX_T4;
5255     _input_size_in_bits = EVEX_32bit;
5256     vector_len = AVX_512bit;
5257   }
5258   InstructionMark im(this);
5259   assert(src != xnoreg, "sanity");
5260   int src_enc = src->encoding();
5261   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5262   emit_int8(0x39);
5263   emit_operand(src, dst);
5264   // 0x01 - extract from upper 128 bits
5265   emit_int8(0x01);
5266 }
5267 
5268 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
5269   assert(VM_Version::supports_evex(), "");
5270   int vector_len = AVX_512bit;
5271   int src_enc = src->encoding();
5272   int dst_enc = dst->encoding();
5273   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5274                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5275   emit_int8(0x3B);
5276   emit_int8((unsigned char)(0xC0 | encode));
5277   // 0x01 - extract from upper 256 bits
5278   emit_int8(0x01);
5279 }
5280 
5281 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
5282   assert(VM_Version::supports_evex(), "");
5283   int vector_len = AVX_512bit;
5284   int src_enc = src->encoding();
5285   int dst_enc = dst->encoding();
5286   int encode;
5287   if (VM_Version::supports_avx512dq()) {
5288     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5289                                    /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5290   } else {
5291     encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5292                                    /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
5293   }
5294   emit_int8(0x39);
5295   emit_int8((unsigned char)(0xC0 | encode));
5296   // 0x01 - extract from bits 255:128
5297   // 0x02 - extract from bits 383:256
5298   // 0x03 - extract from bits 511:384
5299   emit_int8(value & 0x3);
5300 }
5301 
5302 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
5303   assert(VM_Version::supports_evex(), "");
5304   int vector_len = AVX_512bit;
5305   int src_enc = src->encoding();
5306   int dst_enc = dst->encoding();
5307   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5308                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5309   emit_int8(0x1B);
5310   emit_int8((unsigned char)(0xC0 | encode));
5311   // 0x01 - extract from upper 256 bits
5312   emit_int8(0x01);
5313 }
5314 
5315 void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
5316   assert(VM_Version::supports_evex(), "");
5317   _tuple_type = EVEX_T4;
5318   _input_size_in_bits = EVEX_64bit;
5319   InstructionMark im(this);
5320   int vector_len = AVX_512bit;
5321   assert(src != xnoreg, "sanity");
5322   int src_enc = src->encoding();
5323   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5324              /* vex_w */ true, vector_len);
5325   emit_int8(0x1B);
5326   emit_operand(src, dst);
5327   // 0x01 - extract from upper 256 bits
5328   emit_int8(0x01);
5329 }
5330 
5331 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
5332   assert(VM_Version::supports_evex(), "");
5333   int vector_len = AVX_512bit;
5334   int src_enc = src->encoding();
5335   int dst_enc = dst->encoding();
5336   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5337                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5338   emit_int8(0x19);
5339   emit_int8((unsigned char)(0xC0 | encode));
5340   // 0x00 - extract from bits 127:0
5341   // 0x01 - extract from bits 255:128
5342   // 0x02 - extract from bits 383:256
5343   // 0x03 - extract from bits 511:384
5344   emit_int8(value & 0x3);
5345 }
5346 
5347 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
5348   assert(VM_Version::supports_evex(), "");
5349   _tuple_type = EVEX_T4;
5350   _input_size_in_bits = EVEX_32bit;
5351   InstructionMark im(this);
5352   int vector_len = AVX_512bit;
5353   assert(src != xnoreg, "sanity");
5354   int src_enc = src->encoding();
5355   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
5356   emit_int8(0x19);
5357   emit_operand(src, dst);
5358   // 0x00 - extract from bits 127:0
5359   // 0x01 - extract from bits 255:128
5360   // 0x02 - extract from bits 383:256
5361   // 0x03 - extract from bits 511:384
5362   emit_int8(value & 0x3);
5363 }
5364 
5365 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
5366   assert(VM_Version::supports_evex(), "");
5367   int vector_len = AVX_512bit;
5368   int src_enc = src->encoding();
5369   int dst_enc = dst->encoding();
5370   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5371                                      /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5372   emit_int8(0x19);
5373   emit_int8((unsigned char)(0xC0 | encode));
5374   // 0x01 - extract from bits 255:128
5375   // 0x02 - extract from bits 383:256
5376   // 0x03 - extract from bits 511:384
5377   emit_int8(value & 0x3);
5378 }
5379 
5380 // duplicate 4-bytes integer data from src into 8 locations in dest
5381 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
5382   _instruction_uses_vl = true;
5383   assert(UseAVX > 1, "");
5384   int vector_len = AVX_256bit;
5385   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5386   emit_int8(0x58);
5387   emit_int8((unsigned char)(0xC0 | encode));
5388 }
5389 
5390 // duplicate 2-bytes integer data from src into 16 locations in dest
5391 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
5392   assert(VM_Version::supports_avx2(), "");
5393   bool vector_len = AVX_256bit;
5394   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5395                                      vector_len, VEX_OPCODE_0F_38, false);
5396   emit_int8(0x79);
5397   emit_int8((unsigned char)(0xC0 | encode));
5398 }
5399 
5400 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5401 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
5402   _instruction_uses_vl = true;
5403   assert(UseAVX > 1, "");
5404   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5405   emit_int8(0x78);
5406   emit_int8((unsigned char)(0xC0 | encode));
5407 }
5408 
5409 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
5410   _instruction_uses_vl = true;
5411   assert(UseAVX > 1, "");
5412   _tuple_type = EVEX_T1S;
5413   _input_size_in_bits = EVEX_8bit;
5414   InstructionMark im(this);
5415   assert(dst != xnoreg, "sanity");
5416   int dst_enc = dst->encoding();
5417   // swap src<->dst for encoding
5418   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5419   emit_int8(0x78);
5420   emit_operand(dst, src);
5421 }
5422 
5423 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5424 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
5425   _instruction_uses_vl = true;
5426   assert(UseAVX > 1, "");
5427   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5428   emit_int8(0x79);
5429   emit_int8((unsigned char)(0xC0 | encode));
5430 }
5431 
5432 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
5433   _instruction_uses_vl = true;
5434   assert(UseAVX > 1, "");
5435   _tuple_type = EVEX_T1S;
5436   _input_size_in_bits = EVEX_16bit;
5437   InstructionMark im(this);
5438   assert(dst != xnoreg, "sanity");
5439   int dst_enc = dst->encoding();
5440   // swap src<->dst for encoding
5441   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5442   emit_int8(0x79);
5443   emit_operand(dst, src);
5444 }
5445 
5446 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5447 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
5448   _instruction_uses_vl = true;
5449   assert(UseAVX > 1, "");
5450   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
5451   emit_int8(0x58);
5452   emit_int8((unsigned char)(0xC0 | encode));
5453 }
5454 
5455 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
5456   _instruction_uses_vl = true;
5457   assert(UseAVX > 1, "");
5458   _tuple_type = EVEX_T1S;
5459   _input_size_in_bits = EVEX_32bit;
5460   InstructionMark im(this);
5461   assert(dst != xnoreg, "sanity");
5462   int dst_enc = dst->encoding();
5463   // swap src<->dst for encoding
5464   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5465   emit_int8(0x58);
5466   emit_operand(dst, src);
5467 }
5468 
5469 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5470 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
5471   _instruction_uses_vl = true;
5472   assert(UseAVX > 1, "");
5473   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5474                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
5475   emit_int8(0x59);
5476   emit_int8((unsigned char)(0xC0 | encode));
5477 }
5478 
5479 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
5480   _instruction_uses_vl = true;
5481   assert(UseAVX > 1, "");
5482   _tuple_type = EVEX_T1S;
5483   _input_size_in_bits = EVEX_64bit;
5484   InstructionMark im(this);
5485   assert(dst != xnoreg, "sanity");
5486   int dst_enc = dst->encoding();
5487   // swap src<->dst for encoding
5488   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5489   emit_int8(0x59);
5490   emit_operand(dst, src);
5491 }
5492 
5493 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
5494 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
5495   _instruction_uses_vl = true;
5496   assert(UseAVX > 1, "");
5497   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5498                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5499   emit_int8(0x18);
5500   emit_int8((unsigned char)(0xC0 | encode));
5501 }
5502 
5503 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
5504   assert(UseAVX > 1, "");
5505   _tuple_type = EVEX_T1S;
5506   _input_size_in_bits = EVEX_32bit;
5507   InstructionMark im(this);
5508   assert(dst != xnoreg, "sanity");
5509   int dst_enc = dst->encoding();
5510   // swap src<->dst for encoding
5511   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
5512   emit_int8(0x18);
5513   emit_operand(dst, src);
5514 }
5515 
5516 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
5517 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
5518   _instruction_uses_vl = true;
5519   assert(UseAVX > 1, "");
5520   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5521                                      /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5522   emit_int8(0x19);
5523   emit_int8((unsigned char)(0xC0 | encode));
5524 }
5525 
5526 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
5527   _instruction_uses_vl = true;
5528   assert(UseAVX > 1, "");
5529   _tuple_type = EVEX_T1S;
5530   _input_size_in_bits = EVEX_64bit;
5531   InstructionMark im(this);
5532   assert(dst != xnoreg, "sanity");
5533   int dst_enc = dst->encoding();
5534   // swap src<->dst for encoding
5535   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
5536   emit_int8(0x19);
5537   emit_operand(dst, src);
5538 }
5539 
5540 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5541 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
5542   _instruction_uses_vl = true;
5543   assert(VM_Version::supports_evex(), "");
5544   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5545                                      /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5546   emit_int8(0x7A);
5547   emit_int8((unsigned char)(0xC0 | encode));
5548 }
5549 
5550 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5551 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
5552   _instruction_uses_vl = true;
5553   assert(VM_Version::supports_evex(), "");
5554   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5555                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5556   emit_int8(0x7B);
5557   emit_int8((unsigned char)(0xC0 | encode));
5558 }
5559 
5560 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5561 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
5562   _instruction_uses_vl = true;
5563   assert(VM_Version::supports_evex(), "");
5564   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5565                                      /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5566   emit_int8(0x7C);
5567   emit_int8((unsigned char)(0xC0 | encode));
5568 }
5569 
5570 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5571 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
5572   _instruction_uses_vl = true;
5573   assert(VM_Version::supports_evex(), "");
5574   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
5575                                      /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
5576   emit_int8(0x7C);
5577   emit_int8((unsigned char)(0xC0 | encode));
5578 }
5579 
5580 // Carry-Less Multiplication Quadword
5581 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
5582   assert(VM_Version::supports_clmul(), "");
5583   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
5584                                       VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
5585   emit_int8(0x44);
5586   emit_int8((unsigned char)(0xC0 | encode));
5587   emit_int8((unsigned char)mask);
5588 }
5589 
5590 // Carry-Less Multiplication Quadword
5591 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
5592   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
5593   int vector_len = AVX_128bit;
5594   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
5595   emit_int8(0x44);
5596   emit_int8((unsigned char)(0xC0 | encode));
5597   emit_int8((unsigned char)mask);
5598 }
5599 
5600 void Assembler::vzeroupper() {
5601   assert(VM_Version::supports_avx(), "");
5602   if (UseAVX < 3)
5603   {
5604     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
5605     emit_int8(0x77);
5606   }
5607 }
5608 
5609 
5610 #ifndef _LP64
5611 // 32bit only pieces of the assembler
5612 
5613 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5614   // NO PREFIX AS NEVER 64BIT
5615   InstructionMark im(this);
5616   emit_int8((unsigned char)0x81);
5617   emit_int8((unsigned char)(0xF8 | src1->encoding()));
5618   emit_data(imm32, rspec, 0);
5619 }
5620 
5621 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5622   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
5623   InstructionMark im(this);
5624   emit_int8((unsigned char)0x81);
5625   emit_operand(rdi, src1);
5626   emit_data(imm32, rspec, 0);
5627 }
5628 
5629 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
5630 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
5631 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
5632 void Assembler::cmpxchg8(Address adr) {
5633   InstructionMark im(this);
5634   emit_int8(0x0F);
5635   emit_int8((unsigned char)0xC7);
5636   emit_operand(rcx, adr);
5637 }
5638 
5639 void Assembler::decl(Register dst) {
5640   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5641  emit_int8(0x48 | dst->encoding());
5642 }
5643 
5644 #endif // _LP64
5645 
5646 // 64bit typically doesn't use the x87 but needs to for the trig funcs
5647 
5648 void Assembler::fabs() {
5649   emit_int8((unsigned char)0xD9);
5650   emit_int8((unsigned char)0xE1);
5651 }
5652 
5653 void Assembler::fadd(int i) {
5654   emit_farith(0xD8, 0xC0, i);
5655 }
5656 
5657 void Assembler::fadd_d(Address src) {
5658   InstructionMark im(this);
5659   emit_int8((unsigned char)0xDC);
5660   emit_operand32(rax, src);
5661 }
5662 
5663 void Assembler::fadd_s(Address src) {
5664   InstructionMark im(this);
5665   emit_int8((unsigned char)0xD8);
5666   emit_operand32(rax, src);
5667 }
5668 
5669 void Assembler::fadda(int i) {
5670   emit_farith(0xDC, 0xC0, i);
5671 }
5672 
5673 void Assembler::faddp(int i) {
5674   emit_farith(0xDE, 0xC0, i);
5675 }
5676 
5677 void Assembler::fchs() {
5678   emit_int8((unsigned char)0xD9);
5679   emit_int8((unsigned char)0xE0);
5680 }
5681 
5682 void Assembler::fcom(int i) {
5683   emit_farith(0xD8, 0xD0, i);
5684 }
5685 
5686 void Assembler::fcomp(int i) {
5687   emit_farith(0xD8, 0xD8, i);
5688 }
5689 
5690 void Assembler::fcomp_d(Address src) {
5691   InstructionMark im(this);
5692   emit_int8((unsigned char)0xDC);
5693   emit_operand32(rbx, src);
5694 }
5695 
5696 void Assembler::fcomp_s(Address src) {
5697   InstructionMark im(this);
5698   emit_int8((unsigned char)0xD8);
5699   emit_operand32(rbx, src);
5700 }
5701 
5702 void Assembler::fcompp() {
5703   emit_int8((unsigned char)0xDE);
5704   emit_int8((unsigned char)0xD9);
5705 }
5706 
5707 void Assembler::fcos() {
5708   emit_int8((unsigned char)0xD9);
5709   emit_int8((unsigned char)0xFF);
5710 }
5711 
5712 void Assembler::fdecstp() {
5713   emit_int8((unsigned char)0xD9);
5714   emit_int8((unsigned char)0xF6);
5715 }
5716 
5717 void Assembler::fdiv(int i) {
5718   emit_farith(0xD8, 0xF0, i);
5719 }
5720 
5721 void Assembler::fdiv_d(Address src) {
5722   InstructionMark im(this);
5723   emit_int8((unsigned char)0xDC);
5724   emit_operand32(rsi, src);
5725 }
5726 
5727 void Assembler::fdiv_s(Address src) {
5728   InstructionMark im(this);
5729   emit_int8((unsigned char)0xD8);
5730   emit_operand32(rsi, src);
5731 }
5732 
5733 void Assembler::fdiva(int i) {
5734   emit_farith(0xDC, 0xF8, i);
5735 }
5736 
5737 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
5738 //       is erroneous for some of the floating-point instructions below.
5739 
5740 void Assembler::fdivp(int i) {
5741   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
5742 }
5743 
5744 void Assembler::fdivr(int i) {
5745   emit_farith(0xD8, 0xF8, i);
5746 }
5747 
5748 void Assembler::fdivr_d(Address src) {
5749   InstructionMark im(this);
5750   emit_int8((unsigned char)0xDC);
5751   emit_operand32(rdi, src);
5752 }
5753 
5754 void Assembler::fdivr_s(Address src) {
5755   InstructionMark im(this);
5756   emit_int8((unsigned char)0xD8);
5757   emit_operand32(rdi, src);
5758 }
5759 
5760 void Assembler::fdivra(int i) {
5761   emit_farith(0xDC, 0xF0, i);
5762 }
5763 
5764 void Assembler::fdivrp(int i) {
5765   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
5766 }
5767 
5768 void Assembler::ffree(int i) {
5769   emit_farith(0xDD, 0xC0, i);
5770 }
5771 
5772 void Assembler::fild_d(Address adr) {
5773   InstructionMark im(this);
5774   emit_int8((unsigned char)0xDF);
5775   emit_operand32(rbp, adr);
5776 }
5777 
5778 void Assembler::fild_s(Address adr) {
5779   InstructionMark im(this);
5780   emit_int8((unsigned char)0xDB);
5781   emit_operand32(rax, adr);
5782 }
5783 
5784 void Assembler::fincstp() {
5785   emit_int8((unsigned char)0xD9);
5786   emit_int8((unsigned char)0xF7);
5787 }
5788 
5789 void Assembler::finit() {
5790   emit_int8((unsigned char)0x9B);
5791   emit_int8((unsigned char)0xDB);
5792   emit_int8((unsigned char)0xE3);
5793 }
5794 
5795 void Assembler::fist_s(Address adr) {
5796   InstructionMark im(this);
5797   emit_int8((unsigned char)0xDB);
5798   emit_operand32(rdx, adr);
5799 }
5800 
5801 void Assembler::fistp_d(Address adr) {
5802   InstructionMark im(this);
5803   emit_int8((unsigned char)0xDF);
5804   emit_operand32(rdi, adr);
5805 }
5806 
5807 void Assembler::fistp_s(Address adr) {
5808   InstructionMark im(this);
5809   emit_int8((unsigned char)0xDB);
5810   emit_operand32(rbx, adr);
5811 }
5812 
5813 void Assembler::fld1() {
5814   emit_int8((unsigned char)0xD9);
5815   emit_int8((unsigned char)0xE8);
5816 }
5817 
5818 void Assembler::fld_d(Address adr) {
5819   InstructionMark im(this);
5820   emit_int8((unsigned char)0xDD);
5821   emit_operand32(rax, adr);
5822 }
5823 
5824 void Assembler::fld_s(Address adr) {
5825   InstructionMark im(this);
5826   emit_int8((unsigned char)0xD9);
5827   emit_operand32(rax, adr);
5828 }
5829 
5830 
5831 void Assembler::fld_s(int index) {
5832   emit_farith(0xD9, 0xC0, index);
5833 }
5834 
5835 void Assembler::fld_x(Address adr) {
5836   InstructionMark im(this);
5837   emit_int8((unsigned char)0xDB);
5838   emit_operand32(rbp, adr);
5839 }
5840 
5841 void Assembler::fldcw(Address src) {
5842   InstructionMark im(this);
5843   emit_int8((unsigned char)0xD9);
5844   emit_operand32(rbp, src);
5845 }
5846 
5847 void Assembler::fldenv(Address src) {
5848   InstructionMark im(this);
5849   emit_int8((unsigned char)0xD9);
5850   emit_operand32(rsp, src);
5851 }
5852 
5853 void Assembler::fldlg2() {
5854   emit_int8((unsigned char)0xD9);
5855   emit_int8((unsigned char)0xEC);
5856 }
5857 
5858 void Assembler::fldln2() {
5859   emit_int8((unsigned char)0xD9);
5860   emit_int8((unsigned char)0xED);
5861 }
5862 
5863 void Assembler::fldz() {
5864   emit_int8((unsigned char)0xD9);
5865   emit_int8((unsigned char)0xEE);
5866 }
5867 
5868 void Assembler::flog() {
5869   fldln2();
5870   fxch();
5871   fyl2x();
5872 }
5873 
5874 void Assembler::flog10() {
5875   fldlg2();
5876   fxch();
5877   fyl2x();
5878 }
5879 
5880 void Assembler::fmul(int i) {
5881   emit_farith(0xD8, 0xC8, i);
5882 }
5883 
5884 void Assembler::fmul_d(Address src) {
5885   InstructionMark im(this);
5886   emit_int8((unsigned char)0xDC);
5887   emit_operand32(rcx, src);
5888 }
5889 
5890 void Assembler::fmul_s(Address src) {
5891   InstructionMark im(this);
5892   emit_int8((unsigned char)0xD8);
5893   emit_operand32(rcx, src);
5894 }
5895 
5896 void Assembler::fmula(int i) {
5897   emit_farith(0xDC, 0xC8, i);
5898 }
5899 
5900 void Assembler::fmulp(int i) {
5901   emit_farith(0xDE, 0xC8, i);
5902 }
5903 
5904 void Assembler::fnsave(Address dst) {
5905   InstructionMark im(this);
5906   emit_int8((unsigned char)0xDD);
5907   emit_operand32(rsi, dst);
5908 }
5909 
5910 void Assembler::fnstcw(Address src) {
5911   InstructionMark im(this);
5912   emit_int8((unsigned char)0x9B);
5913   emit_int8((unsigned char)0xD9);
5914   emit_operand32(rdi, src);
5915 }
5916 
5917 void Assembler::fnstsw_ax() {
5918   emit_int8((unsigned char)0xDF);
5919   emit_int8((unsigned char)0xE0);
5920 }
5921 
5922 void Assembler::fprem() {
5923   emit_int8((unsigned char)0xD9);
5924   emit_int8((unsigned char)0xF8);
5925 }
5926 
5927 void Assembler::fprem1() {
5928   emit_int8((unsigned char)0xD9);
5929   emit_int8((unsigned char)0xF5);
5930 }
5931 
5932 void Assembler::frstor(Address src) {
5933   InstructionMark im(this);
5934   emit_int8((unsigned char)0xDD);
5935   emit_operand32(rsp, src);
5936 }
5937 
5938 void Assembler::fsin() {
5939   emit_int8((unsigned char)0xD9);
5940   emit_int8((unsigned char)0xFE);
5941 }
5942 
5943 void Assembler::fsqrt() {
5944   emit_int8((unsigned char)0xD9);
5945   emit_int8((unsigned char)0xFA);
5946 }
5947 
5948 void Assembler::fst_d(Address adr) {
5949   InstructionMark im(this);
5950   emit_int8((unsigned char)0xDD);
5951   emit_operand32(rdx, adr);
5952 }
5953 
5954 void Assembler::fst_s(Address adr) {
5955   InstructionMark im(this);
5956   emit_int8((unsigned char)0xD9);
5957   emit_operand32(rdx, adr);
5958 }
5959 
5960 void Assembler::fstp_d(Address adr) {
5961   InstructionMark im(this);
5962   emit_int8((unsigned char)0xDD);
5963   emit_operand32(rbx, adr);
5964 }
5965 
5966 void Assembler::fstp_d(int index) {
5967   emit_farith(0xDD, 0xD8, index);
5968 }
5969 
5970 void Assembler::fstp_s(Address adr) {
5971   InstructionMark im(this);
5972   emit_int8((unsigned char)0xD9);
5973   emit_operand32(rbx, adr);
5974 }
5975 
5976 void Assembler::fstp_x(Address adr) {
5977   InstructionMark im(this);
5978   emit_int8((unsigned char)0xDB);
5979   emit_operand32(rdi, adr);
5980 }
5981 
5982 void Assembler::fsub(int i) {
5983   emit_farith(0xD8, 0xE0, i);
5984 }
5985 
5986 void Assembler::fsub_d(Address src) {
5987   InstructionMark im(this);
5988   emit_int8((unsigned char)0xDC);
5989   emit_operand32(rsp, src);
5990 }
5991 
5992 void Assembler::fsub_s(Address src) {
5993   InstructionMark im(this);
5994   emit_int8((unsigned char)0xD8);
5995   emit_operand32(rsp, src);
5996 }
5997 
5998 void Assembler::fsuba(int i) {
5999   emit_farith(0xDC, 0xE8, i);
6000 }
6001 
6002 void Assembler::fsubp(int i) {
6003   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
6004 }
6005 
6006 void Assembler::fsubr(int i) {
6007   emit_farith(0xD8, 0xE8, i);
6008 }
6009 
6010 void Assembler::fsubr_d(Address src) {
6011   InstructionMark im(this);
6012   emit_int8((unsigned char)0xDC);
6013   emit_operand32(rbp, src);
6014 }
6015 
6016 void Assembler::fsubr_s(Address src) {
6017   InstructionMark im(this);
6018   emit_int8((unsigned char)0xD8);
6019   emit_operand32(rbp, src);
6020 }
6021 
6022 void Assembler::fsubra(int i) {
6023   emit_farith(0xDC, 0xE0, i);
6024 }
6025 
6026 void Assembler::fsubrp(int i) {
6027   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
6028 }
6029 
6030 void Assembler::ftan() {
6031   emit_int8((unsigned char)0xD9);
6032   emit_int8((unsigned char)0xF2);
6033   emit_int8((unsigned char)0xDD);
6034   emit_int8((unsigned char)0xD8);
6035 }
6036 
6037 void Assembler::ftst() {
6038   emit_int8((unsigned char)0xD9);
6039   emit_int8((unsigned char)0xE4);
6040 }
6041 
6042 void Assembler::fucomi(int i) {
6043   // make sure the instruction is supported (introduced for P6, together with cmov)
6044   guarantee(VM_Version::supports_cmov(), "illegal instruction");
6045   emit_farith(0xDB, 0xE8, i);
6046 }
6047 
6048 void Assembler::fucomip(int i) {
6049   // make sure the instruction is supported (introduced for P6, together with cmov)
6050   guarantee(VM_Version::supports_cmov(), "illegal instruction");
6051   emit_farith(0xDF, 0xE8, i);
6052 }
6053 
6054 void Assembler::fwait() {
6055   emit_int8((unsigned char)0x9B);
6056 }
6057 
6058 void Assembler::fxch(int i) {
6059   emit_farith(0xD9, 0xC8, i);
6060 }
6061 
6062 void Assembler::fyl2x() {
6063   emit_int8((unsigned char)0xD9);
6064   emit_int8((unsigned char)0xF1);
6065 }
6066 
6067 void Assembler::frndint() {
6068   emit_int8((unsigned char)0xD9);
6069   emit_int8((unsigned char)0xFC);
6070 }
6071 
6072 void Assembler::f2xm1() {
6073   emit_int8((unsigned char)0xD9);
6074   emit_int8((unsigned char)0xF0);
6075 }
6076 
6077 void Assembler::fldl2e() {
6078   emit_int8((unsigned char)0xD9);
6079   emit_int8((unsigned char)0xEA);
6080 }
6081 
6082 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
6083 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
6084 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
6085 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
6086 
6087 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
6088 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6089   if (pre > 0) {
6090     emit_int8(simd_pre[pre]);
6091   }
6092   if (rex_w) {
6093     prefixq(adr, xreg);
6094   } else {
6095     prefix(adr, xreg);
6096   }
6097   if (opc > 0) {
6098     emit_int8(0x0F);
6099     int opc2 = simd_opc[opc];
6100     if (opc2 > 0) {
6101       emit_int8(opc2);
6102     }
6103   }
6104 }
6105 
6106 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
6107   if (pre > 0) {
6108     emit_int8(simd_pre[pre]);
6109   }
6110   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
6111                           prefix_and_encode(dst_enc, src_enc);
6112   if (opc > 0) {
6113     emit_int8(0x0F);
6114     int opc2 = simd_opc[opc];
6115     if (opc2 > 0) {
6116       emit_int8(opc2);
6117     }
6118   }
6119   return encode;
6120 }
6121 
6122 
6123 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
6124   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
6125     prefix(VEX_3bytes);
6126 
6127     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
6128     byte1 = (~byte1) & 0xE0;
6129     byte1 |= opc;
6130     emit_int8(byte1);
6131 
6132     int byte2 = ((~nds_enc) & 0xf) << 3;
6133     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
6134     emit_int8(byte2);
6135   } else {
6136     prefix(VEX_2bytes);
6137 
6138     int byte1 = vex_r ? VEX_R : 0;
6139     byte1 = (~byte1) & 0x80;
6140     byte1 |= ((~nds_enc) & 0xf) << 3;
6141     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
6142     emit_int8(byte1);
6143   }
6144 }
6145 
6146 // This is a 4 byte encoding
6147 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
6148                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
6149                             bool is_extended_context, bool is_merge_context,
6150                             int vector_len, bool no_mask_reg ){
6151   // EVEX 0x62 prefix
6152   prefix(EVEX_4bytes);
6153   _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
6154 
6155   // P0: byte 2, initialized to RXBR`00mm
6156   // instead of not'd
6157   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
6158   byte2 = (~byte2) & 0xF0;
6159   // confine opc opcode extensions in mm bits to lower two bits
6160   // of form {0F, 0F_38, 0F_3A}
6161   byte2 |= opc;
6162   emit_int8(byte2);
6163 
6164   // P1: byte 3 as Wvvvv1pp
6165   int byte3 = ((~nds_enc) & 0xf) << 3;
6166   // p[10] is always 1
6167   byte3 |= EVEX_F;
6168   byte3 |= (vex_w & 1) << 7;
6169   // confine pre opcode extensions in pp bits to lower two bits
6170   // of form {66, F3, F2}
6171   byte3 |= pre;
6172   emit_int8(byte3);
6173 
6174   // P2: byte 4 as zL'Lbv'aaa
6175   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
6176   // EVEX.v` for extending EVEX.vvvv or VIDX
6177   byte4 |= (evex_v ? 0: EVEX_V);
6178   // third EXEC.b for broadcast actions
6179   byte4 |= (is_extended_context ? EVEX_Rb : 0);
6180   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
6181   byte4 |= ((vector_len) & 0x3) << 5;
6182   // last is EVEX.z for zero/merge actions
6183   byte4 |= (is_merge_context ? EVEX_Z : 0);
6184   emit_int8(byte4);
6185 }
6186 
6187 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
6188                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
6189   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
6190   bool vex_b = adr.base_needs_rex();
6191   bool vex_x = adr.index_needs_rex();
6192   _avx_vector_len = vector_len;
6193 
6194   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6195   if (_legacy_mode_vl && _instruction_uses_vl) {
6196     switch (vector_len) {
6197     case AVX_128bit:
6198     case AVX_256bit:
6199       legacy_mode = true;
6200       break;
6201     }
6202   }
6203 
6204   if ((UseAVX > 2) && (legacy_mode == false))
6205   {
6206     bool evex_r = (xreg_enc >= 16);
6207     bool evex_v = (nds_enc >= 16);
6208     _is_evex_instruction = true;
6209     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6210   } else {
6211     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6212   }
6213   _instruction_uses_vl = false;
6214 }
6215 
6216 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
6217                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
6218   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
6219   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
6220   bool vex_x = false;
6221   _avx_vector_len = vector_len;
6222 
6223   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
6224   if (_legacy_mode_vl && _instruction_uses_vl) {
6225     switch (vector_len) {
6226     case AVX_128bit:
6227     case AVX_256bit:
6228       legacy_mode = true;
6229       break;
6230     }
6231   }
6232 
6233   if ((UseAVX > 2) && (legacy_mode == false))
6234   {
6235     bool evex_r = (dst_enc >= 16);
6236     bool evex_v = (nds_enc >= 16);
6237     // can use vex_x as bank extender on rm encoding
6238     vex_x = (src_enc >= 16);
6239     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
6240   } else {
6241     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
6242   }
6243 
6244   _instruction_uses_vl = false;
6245 
6246   // return modrm byte components for operands
6247   return (((dst_enc & 7) << 3) | (src_enc & 7));
6248 }
6249 
6250 
6251 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
6252                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6253   if (UseAVX > 0) {
6254     int xreg_enc = xreg->encoding();
6255     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
6256     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6257   } else {
6258     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
6259     rex_prefix(adr, xreg, pre, opc, rex_w);
6260   }
6261 }
6262 
6263 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
6264                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
6265   int dst_enc = dst->encoding();
6266   int src_enc = src->encoding();
6267   if (UseAVX > 0) {
6268     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6269     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
6270   } else {
6271     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
6272     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
6273   }
6274 }
6275 
6276 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
6277                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6278   int dst_enc = dst->encoding();
6279   int src_enc = src->encoding();
6280   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6281   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6282 }
6283 
6284 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
6285                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
6286   int dst_enc = dst->encoding();
6287   int src_enc = src->encoding();
6288   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6289   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
6290 }
6291 
6292 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6293   InstructionMark im(this);
6294   simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6295   emit_int8(opcode);
6296   emit_operand(dst, src);
6297 }
6298 
6299 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
6300   InstructionMark im(this);
6301   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
6302   emit_int8(opcode);
6303   emit_operand(dst, src);
6304 }
6305 
6306 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6307   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6308   emit_int8(opcode);
6309   emit_int8((unsigned char)(0xC0 | encode));
6310 }
6311 
6312 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6313   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
6314   emit_int8(opcode);
6315   emit_int8((unsigned char)(0xC0 | encode));
6316 }
6317 
6318 // Versions with no second source register (non-destructive source).
6319 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6320   InstructionMark im(this);
6321   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
6322   emit_int8(opcode);
6323   emit_operand(dst, src);
6324 }
6325 
6326 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
6327   InstructionMark im(this);
6328   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
6329   emit_int8(opcode);
6330   emit_operand(dst, src);
6331 }
6332 
6333 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
6334   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
6335   emit_int8(opcode);
6336   emit_int8((unsigned char)(0xC0 | encode));
6337 }
6338 
6339 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6340   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
6341   emit_int8(opcode);
6342   emit_int8((unsigned char)(0xC0 | encode));
6343 }
6344 
6345 // 3-operands AVX instructions
6346 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
6347                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6348   InstructionMark im(this);
6349   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
6350   emit_int8(opcode);
6351   emit_operand(dst, src);
6352 }
6353 
6354 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
6355                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6356   InstructionMark im(this);
6357   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
6358   emit_int8(opcode);
6359   emit_operand(dst, src);
6360 }
6361 
6362 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6363                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6364   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
6365   emit_int8(opcode);
6366   emit_int8((unsigned char)(0xC0 | encode));
6367 }
6368 
6369 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6370                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6371   int src_enc = src->encoding();
6372   int dst_enc = dst->encoding();
6373   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6374   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
6375   emit_int8(opcode);
6376   emit_int8((unsigned char)(0xC0 | encode));
6377 }
6378 
6379 #ifndef _LP64
6380 
6381 void Assembler::incl(Register dst) {
6382   // Don't use it directly. Use MacroAssembler::incrementl() instead.
6383   emit_int8(0x40 | dst->encoding());
6384 }
6385 
6386 void Assembler::lea(Register dst, Address src) {
6387   leal(dst, src);
6388 }
6389 
6390 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
6391   InstructionMark im(this);
6392   emit_int8((unsigned char)0xC7);
6393   emit_operand(rax, dst);
6394   emit_data((int)imm32, rspec, 0);
6395 }
6396 
6397 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
6398   InstructionMark im(this);
6399   int encode = prefix_and_encode(dst->encoding());
6400   emit_int8((unsigned char)(0xB8 | encode));
6401   emit_data((int)imm32, rspec, 0);
6402 }
6403 
6404 void Assembler::popa() { // 32bit
6405   emit_int8(0x61);
6406 }
6407 
6408 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
6409   InstructionMark im(this);
6410   emit_int8(0x68);
6411   emit_data(imm32, rspec, 0);
6412 }
6413 
6414 void Assembler::pusha() { // 32bit
6415   emit_int8(0x60);
6416 }
6417 
6418 void Assembler::set_byte_if_not_zero(Register dst) {
6419   emit_int8(0x0F);
6420   emit_int8((unsigned char)0x95);
6421   emit_int8((unsigned char)(0xE0 | dst->encoding()));
6422 }
6423 
6424 void Assembler::shldl(Register dst, Register src) {
6425   emit_int8(0x0F);
6426   emit_int8((unsigned char)0xA5);
6427   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6428 }
6429 
6430 // 0F A4 / r ib
6431 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
6432   emit_int8(0x0F);
6433   emit_int8((unsigned char)0xA4);
6434   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6435   emit_int8(imm8);
6436 }
6437 
6438 void Assembler::shrdl(Register dst, Register src) {
6439   emit_int8(0x0F);
6440   emit_int8((unsigned char)0xAD);
6441   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6442 }
6443 
6444 #else // LP64
6445 
6446 void Assembler::set_byte_if_not_zero(Register dst) {
6447   int enc = prefix_and_encode(dst->encoding(), true);
6448   emit_int8(0x0F);
6449   emit_int8((unsigned char)0x95);
6450   emit_int8((unsigned char)(0xE0 | enc));
6451 }
6452 
6453 // 64bit only pieces of the assembler
6454 // This should only be used by 64bit instructions that can use rip-relative
6455 // it cannot be used by instructions that want an immediate value.
6456 
6457 bool Assembler::reachable(AddressLiteral adr) {
6458   int64_t disp;
6459   // None will force a 64bit literal to the code stream. Likely a placeholder
6460   // for something that will be patched later and we need to certain it will
6461   // always be reachable.
6462   if (adr.reloc() == relocInfo::none) {
6463     return false;
6464   }
6465   if (adr.reloc() == relocInfo::internal_word_type) {
6466     // This should be rip relative and easily reachable.
6467     return true;
6468   }
6469   if (adr.reloc() == relocInfo::virtual_call_type ||
6470       adr.reloc() == relocInfo::opt_virtual_call_type ||
6471       adr.reloc() == relocInfo::static_call_type ||
6472       adr.reloc() == relocInfo::static_stub_type ) {
6473     // This should be rip relative within the code cache and easily
6474     // reachable until we get huge code caches. (At which point
6475     // ic code is going to have issues).
6476     return true;
6477   }
6478   if (adr.reloc() != relocInfo::external_word_type &&
6479       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
6480       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
6481       adr.reloc() != relocInfo::runtime_call_type ) {
6482     return false;
6483   }
6484 
6485   // Stress the correction code
6486   if (ForceUnreachable) {
6487     // Must be runtimecall reloc, see if it is in the codecache
6488     // Flipping stuff in the codecache to be unreachable causes issues
6489     // with things like inline caches where the additional instructions
6490     // are not handled.
6491     if (CodeCache::find_blob(adr._target) == NULL) {
6492       return false;
6493     }
6494   }
6495   // For external_word_type/runtime_call_type if it is reachable from where we
6496   // are now (possibly a temp buffer) and where we might end up
6497   // anywhere in the codeCache then we are always reachable.
6498   // This would have to change if we ever save/restore shared code
6499   // to be more pessimistic.
6500   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
6501   if (!is_simm32(disp)) return false;
6502   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
6503   if (!is_simm32(disp)) return false;
6504 
6505   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
6506 
6507   // Because rip relative is a disp + address_of_next_instruction and we
6508   // don't know the value of address_of_next_instruction we apply a fudge factor
6509   // to make sure we will be ok no matter the size of the instruction we get placed into.
6510   // We don't have to fudge the checks above here because they are already worst case.
6511 
6512   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
6513   // + 4 because better safe than sorry.
6514   const int fudge = 12 + 4;
6515   if (disp < 0) {
6516     disp -= fudge;
6517   } else {
6518     disp += fudge;
6519   }
6520   return is_simm32(disp);
6521 }
6522 
6523 // Check if the polling page is not reachable from the code cache using rip-relative
6524 // addressing.
6525 bool Assembler::is_polling_page_far() {
6526   intptr_t addr = (intptr_t)os::get_polling_page();
6527   return ForceUnreachable ||
6528          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
6529          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
6530 }
6531 
6532 void Assembler::emit_data64(jlong data,
6533                             relocInfo::relocType rtype,
6534                             int format) {
6535   if (rtype == relocInfo::none) {
6536     emit_int64(data);
6537   } else {
6538     emit_data64(data, Relocation::spec_simple(rtype), format);
6539   }
6540 }
6541 
6542 void Assembler::emit_data64(jlong data,
6543                             RelocationHolder const& rspec,
6544                             int format) {
6545   assert(imm_operand == 0, "default format must be immediate in this file");
6546   assert(imm_operand == format, "must be immediate");
6547   assert(inst_mark() != NULL, "must be inside InstructionMark");
6548   // Do not use AbstractAssembler::relocate, which is not intended for
6549   // embedded words.  Instead, relocate to the enclosing instruction.
6550   code_section()->relocate(inst_mark(), rspec, format);
6551 #ifdef ASSERT
6552   check_relocation(rspec, format);
6553 #endif
6554   emit_int64(data);
6555 }
6556 
6557 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
6558   if (reg_enc >= 8) {
6559     prefix(REX_B);
6560     reg_enc -= 8;
6561   } else if (byteinst && reg_enc >= 4) {
6562     prefix(REX);
6563   }
6564   return reg_enc;
6565 }
6566 
6567 int Assembler::prefixq_and_encode(int reg_enc) {
6568   if (reg_enc < 8) {
6569     prefix(REX_W);
6570   } else {
6571     prefix(REX_WB);
6572     reg_enc -= 8;
6573   }
6574   return reg_enc;
6575 }
6576 
6577 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
6578   if (dst_enc < 8) {
6579     if (src_enc >= 8) {
6580       prefix(REX_B);
6581       src_enc -= 8;
6582     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
6583       prefix(REX);
6584     }
6585   } else {
6586     if (src_enc < 8) {
6587       prefix(REX_R);
6588     } else {
6589       prefix(REX_RB);
6590       src_enc -= 8;
6591     }
6592     dst_enc -= 8;
6593   }
6594   return dst_enc << 3 | src_enc;
6595 }
6596 
6597 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
6598   if (dst_enc < 8) {
6599     if (src_enc < 8) {
6600       prefix(REX_W);
6601     } else {
6602       prefix(REX_WB);
6603       src_enc -= 8;
6604     }
6605   } else {
6606     if (src_enc < 8) {
6607       prefix(REX_WR);
6608     } else {
6609       prefix(REX_WRB);
6610       src_enc -= 8;
6611     }
6612     dst_enc -= 8;
6613   }
6614   return dst_enc << 3 | src_enc;
6615 }
6616 
6617 void Assembler::prefix(Register reg) {
6618   if (reg->encoding() >= 8) {
6619     prefix(REX_B);
6620   }
6621 }
6622 
6623 void Assembler::prefix(Register dst, Register src, Prefix p) {
6624   if (src->encoding() >= 8) {
6625     p = (Prefix)(p | REX_B);
6626   }
6627   if (dst->encoding() >= 8) {
6628     p = (Prefix)( p | REX_R);
6629   }
6630   if (p != Prefix_EMPTY) {
6631     // do not generate an empty prefix
6632     prefix(p);
6633   }
6634 }
6635 
6636 void Assembler::prefix(Register dst, Address adr, Prefix p) {
6637   if (adr.base_needs_rex()) {
6638     if (adr.index_needs_rex()) {
6639       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6640     } else {
6641       prefix(REX_B);
6642     }
6643   } else {
6644     if (adr.index_needs_rex()) {
6645       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6646     }
6647   }
6648   if (dst->encoding() >= 8) {
6649     p = (Prefix)(p | REX_R);
6650   }
6651   if (p != Prefix_EMPTY) {
6652     // do not generate an empty prefix
6653     prefix(p);
6654   }
6655 }
6656 
6657 void Assembler::prefix(Address adr) {
6658   if (adr.base_needs_rex()) {
6659     if (adr.index_needs_rex()) {
6660       prefix(REX_XB);
6661     } else {
6662       prefix(REX_B);
6663     }
6664   } else {
6665     if (adr.index_needs_rex()) {
6666       prefix(REX_X);
6667     }
6668   }
6669 }
6670 
6671 void Assembler::prefixq(Address adr) {
6672   if (adr.base_needs_rex()) {
6673     if (adr.index_needs_rex()) {
6674       prefix(REX_WXB);
6675     } else {
6676       prefix(REX_WB);
6677     }
6678   } else {
6679     if (adr.index_needs_rex()) {
6680       prefix(REX_WX);
6681     } else {
6682       prefix(REX_W);
6683     }
6684   }
6685 }
6686 
6687 
6688 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
6689   if (reg->encoding() < 8) {
6690     if (adr.base_needs_rex()) {
6691       if (adr.index_needs_rex()) {
6692         prefix(REX_XB);
6693       } else {
6694         prefix(REX_B);
6695       }
6696     } else {
6697       if (adr.index_needs_rex()) {
6698         prefix(REX_X);
6699       } else if (byteinst && reg->encoding() >= 4 ) {
6700         prefix(REX);
6701       }
6702     }
6703   } else {
6704     if (adr.base_needs_rex()) {
6705       if (adr.index_needs_rex()) {
6706         prefix(REX_RXB);
6707       } else {
6708         prefix(REX_RB);
6709       }
6710     } else {
6711       if (adr.index_needs_rex()) {
6712         prefix(REX_RX);
6713       } else {
6714         prefix(REX_R);
6715       }
6716     }
6717   }
6718 }
6719 
6720 void Assembler::prefixq(Address adr, Register src) {
6721   if (src->encoding() < 8) {
6722     if (adr.base_needs_rex()) {
6723       if (adr.index_needs_rex()) {
6724         prefix(REX_WXB);
6725       } else {
6726         prefix(REX_WB);
6727       }
6728     } else {
6729       if (adr.index_needs_rex()) {
6730         prefix(REX_WX);
6731       } else {
6732         prefix(REX_W);
6733       }
6734     }
6735   } else {
6736     if (adr.base_needs_rex()) {
6737       if (adr.index_needs_rex()) {
6738         prefix(REX_WRXB);
6739       } else {
6740         prefix(REX_WRB);
6741       }
6742     } else {
6743       if (adr.index_needs_rex()) {
6744         prefix(REX_WRX);
6745       } else {
6746         prefix(REX_WR);
6747       }
6748     }
6749   }
6750 }
6751 
6752 void Assembler::prefix(Address adr, XMMRegister reg) {
6753   if (reg->encoding() < 8) {
6754     if (adr.base_needs_rex()) {
6755       if (adr.index_needs_rex()) {
6756         prefix(REX_XB);
6757       } else {
6758         prefix(REX_B);
6759       }
6760     } else {
6761       if (adr.index_needs_rex()) {
6762         prefix(REX_X);
6763       }
6764     }
6765   } else {
6766     if (adr.base_needs_rex()) {
6767       if (adr.index_needs_rex()) {
6768         prefix(REX_RXB);
6769       } else {
6770         prefix(REX_RB);
6771       }
6772     } else {
6773       if (adr.index_needs_rex()) {
6774         prefix(REX_RX);
6775       } else {
6776         prefix(REX_R);
6777       }
6778     }
6779   }
6780 }
6781 
6782 void Assembler::prefixq(Address adr, XMMRegister src) {
6783   if (src->encoding() < 8) {
6784     if (adr.base_needs_rex()) {
6785       if (adr.index_needs_rex()) {
6786         prefix(REX_WXB);
6787       } else {
6788         prefix(REX_WB);
6789       }
6790     } else {
6791       if (adr.index_needs_rex()) {
6792         prefix(REX_WX);
6793       } else {
6794         prefix(REX_W);
6795       }
6796     }
6797   } else {
6798     if (adr.base_needs_rex()) {
6799       if (adr.index_needs_rex()) {
6800         prefix(REX_WRXB);
6801       } else {
6802         prefix(REX_WRB);
6803       }
6804     } else {
6805       if (adr.index_needs_rex()) {
6806         prefix(REX_WRX);
6807       } else {
6808         prefix(REX_WR);
6809       }
6810     }
6811   }
6812 }
6813 
6814 void Assembler::adcq(Register dst, int32_t imm32) {
6815   (void) prefixq_and_encode(dst->encoding());
6816   emit_arith(0x81, 0xD0, dst, imm32);
6817 }
6818 
6819 void Assembler::adcq(Register dst, Address src) {
6820   InstructionMark im(this);
6821   prefixq(src, dst);
6822   emit_int8(0x13);
6823   emit_operand(dst, src);
6824 }
6825 
6826 void Assembler::adcq(Register dst, Register src) {
6827   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6828   emit_arith(0x13, 0xC0, dst, src);
6829 }
6830 
6831 void Assembler::addq(Address dst, int32_t imm32) {
6832   InstructionMark im(this);
6833   prefixq(dst);
6834   emit_arith_operand(0x81, rax, dst,imm32);
6835 }
6836 
6837 void Assembler::addq(Address dst, Register src) {
6838   InstructionMark im(this);
6839   prefixq(dst, src);
6840   emit_int8(0x01);
6841   emit_operand(src, dst);
6842 }
6843 
6844 void Assembler::addq(Register dst, int32_t imm32) {
6845   (void) prefixq_and_encode(dst->encoding());
6846   emit_arith(0x81, 0xC0, dst, imm32);
6847 }
6848 
6849 void Assembler::addq(Register dst, Address src) {
6850   InstructionMark im(this);
6851   prefixq(src, dst);
6852   emit_int8(0x03);
6853   emit_operand(dst, src);
6854 }
6855 
6856 void Assembler::addq(Register dst, Register src) {
6857   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6858   emit_arith(0x03, 0xC0, dst, src);
6859 }
6860 
6861 void Assembler::adcxq(Register dst, Register src) {
6862   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6863   emit_int8((unsigned char)0x66);
6864   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6865   emit_int8(0x0F);
6866   emit_int8(0x38);
6867   emit_int8((unsigned char)0xF6);
6868   emit_int8((unsigned char)(0xC0 | encode));
6869 }
6870 
6871 void Assembler::adoxq(Register dst, Register src) {
6872   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6873   emit_int8((unsigned char)0xF3);
6874   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6875   emit_int8(0x0F);
6876   emit_int8(0x38);
6877   emit_int8((unsigned char)0xF6);
6878   emit_int8((unsigned char)(0xC0 | encode));
6879 }
6880 
6881 void Assembler::andq(Address dst, int32_t imm32) {
6882   InstructionMark im(this);
6883   prefixq(dst);
6884   emit_int8((unsigned char)0x81);
6885   emit_operand(rsp, dst, 4);
6886   emit_int32(imm32);
6887 }
6888 
6889 void Assembler::andq(Register dst, int32_t imm32) {
6890   (void) prefixq_and_encode(dst->encoding());
6891   emit_arith(0x81, 0xE0, dst, imm32);
6892 }
6893 
6894 void Assembler::andq(Register dst, Address src) {
6895   InstructionMark im(this);
6896   prefixq(src, dst);
6897   emit_int8(0x23);
6898   emit_operand(dst, src);
6899 }
6900 
6901 void Assembler::andq(Register dst, Register src) {
6902   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6903   emit_arith(0x23, 0xC0, dst, src);
6904 }
6905 
6906 void Assembler::andnq(Register dst, Register src1, Register src2) {
6907   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6908   int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
6909   emit_int8((unsigned char)0xF2);
6910   emit_int8((unsigned char)(0xC0 | encode));
6911 }
6912 
6913 void Assembler::andnq(Register dst, Register src1, Address src2) {
6914   InstructionMark im(this);
6915   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6916   vex_prefix_0F38_q_legacy(dst, src1, src2);
6917   emit_int8((unsigned char)0xF2);
6918   emit_operand(dst, src2);
6919 }
6920 
6921 void Assembler::bsfq(Register dst, Register src) {
6922   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6923   emit_int8(0x0F);
6924   emit_int8((unsigned char)0xBC);
6925   emit_int8((unsigned char)(0xC0 | encode));
6926 }
6927 
6928 void Assembler::bsrq(Register dst, Register src) {
6929   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6930   emit_int8(0x0F);
6931   emit_int8((unsigned char)0xBD);
6932   emit_int8((unsigned char)(0xC0 | encode));
6933 }
6934 
6935 void Assembler::bswapq(Register reg) {
6936   int encode = prefixq_and_encode(reg->encoding());
6937   emit_int8(0x0F);
6938   emit_int8((unsigned char)(0xC8 | encode));
6939 }
6940 
6941 void Assembler::blsiq(Register dst, Register src) {
6942   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6943   int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
6944   emit_int8((unsigned char)0xF3);
6945   emit_int8((unsigned char)(0xC0 | encode));
6946 }
6947 
6948 void Assembler::blsiq(Register dst, Address src) {
6949   InstructionMark im(this);
6950   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6951   vex_prefix_0F38_q_legacy(rbx, dst, src);
6952   emit_int8((unsigned char)0xF3);
6953   emit_operand(rbx, src);
6954 }
6955 
6956 void Assembler::blsmskq(Register dst, Register src) {
6957   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6958   int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
6959   emit_int8((unsigned char)0xF3);
6960   emit_int8((unsigned char)(0xC0 | encode));
6961 }
6962 
6963 void Assembler::blsmskq(Register dst, Address src) {
6964   InstructionMark im(this);
6965   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6966   vex_prefix_0F38_q_legacy(rdx, dst, src);
6967   emit_int8((unsigned char)0xF3);
6968   emit_operand(rdx, src);
6969 }
6970 
6971 void Assembler::blsrq(Register dst, Register src) {
6972   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6973   int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
6974   emit_int8((unsigned char)0xF3);
6975   emit_int8((unsigned char)(0xC0 | encode));
6976 }
6977 
6978 void Assembler::blsrq(Register dst, Address src) {
6979   InstructionMark im(this);
6980   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6981   vex_prefix_0F38_q_legacy(rcx, dst, src);
6982   emit_int8((unsigned char)0xF3);
6983   emit_operand(rcx, src);
6984 }
6985 
6986 void Assembler::cdqq() {
6987   prefix(REX_W);
6988   emit_int8((unsigned char)0x99);
6989 }
6990 
6991 void Assembler::clflush(Address adr) {
6992   prefix(adr);
6993   emit_int8(0x0F);
6994   emit_int8((unsigned char)0xAE);
6995   emit_operand(rdi, adr);
6996 }
6997 
6998 void Assembler::cmovq(Condition cc, Register dst, Register src) {
6999   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7000   emit_int8(0x0F);
7001   emit_int8(0x40 | cc);
7002   emit_int8((unsigned char)(0xC0 | encode));
7003 }
7004 
7005 void Assembler::cmovq(Condition cc, Register dst, Address src) {
7006   InstructionMark im(this);
7007   prefixq(src, dst);
7008   emit_int8(0x0F);
7009   emit_int8(0x40 | cc);
7010   emit_operand(dst, src);
7011 }
7012 
7013 void Assembler::cmpq(Address dst, int32_t imm32) {
7014   InstructionMark im(this);
7015   prefixq(dst);
7016   emit_int8((unsigned char)0x81);
7017   emit_operand(rdi, dst, 4);
7018   emit_int32(imm32);
7019 }
7020 
7021 void Assembler::cmpq(Register dst, int32_t imm32) {
7022   (void) prefixq_and_encode(dst->encoding());
7023   emit_arith(0x81, 0xF8, dst, imm32);
7024 }
7025 
7026 void Assembler::cmpq(Address dst, Register src) {
7027   InstructionMark im(this);
7028   prefixq(dst, src);
7029   emit_int8(0x3B);
7030   emit_operand(src, dst);
7031 }
7032 
7033 void Assembler::cmpq(Register dst, Register src) {
7034   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7035   emit_arith(0x3B, 0xC0, dst, src);
7036 }
7037 
7038 void Assembler::cmpq(Register dst, Address  src) {
7039   InstructionMark im(this);
7040   prefixq(src, dst);
7041   emit_int8(0x3B);
7042   emit_operand(dst, src);
7043 }
7044 
7045 void Assembler::cmpxchgq(Register reg, Address adr) {
7046   InstructionMark im(this);
7047   prefixq(adr, reg);
7048   emit_int8(0x0F);
7049   emit_int8((unsigned char)0xB1);
7050   emit_operand(reg, adr);
7051 }
7052 
7053 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
7054   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7055   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7056   emit_int8(0x2A);
7057   emit_int8((unsigned char)(0xC0 | encode));
7058 }
7059 
7060 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
7061   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7062   if (VM_Version::supports_evex()) {
7063     _tuple_type = EVEX_T1S;
7064     _input_size_in_bits = EVEX_32bit;
7065   }
7066   InstructionMark im(this);
7067   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
7068   emit_int8(0x2A);
7069   emit_operand(dst, src);
7070 }
7071 
7072 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
7073   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7074   if (VM_Version::supports_evex()) {
7075     _tuple_type = EVEX_T1S;
7076     _input_size_in_bits = EVEX_32bit;
7077   }
7078   InstructionMark im(this);
7079   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
7080   emit_int8(0x2A);
7081   emit_operand(dst, src);
7082 }
7083 
7084 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
7085   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7086   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
7087   emit_int8(0x2C);
7088   emit_int8((unsigned char)(0xC0 | encode));
7089 }
7090 
7091 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
7092   NOT_LP64(assert(VM_Version::supports_sse(), ""));
7093   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
7094   emit_int8(0x2C);
7095   emit_int8((unsigned char)(0xC0 | encode));
7096 }
7097 
7098 void Assembler::decl(Register dst) {
7099   // Don't use it directly. Use MacroAssembler::decrementl() instead.
7100   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
7101   int encode = prefix_and_encode(dst->encoding());
7102   emit_int8((unsigned char)0xFF);
7103   emit_int8((unsigned char)(0xC8 | encode));
7104 }
7105 
7106 void Assembler::decq(Register dst) {
7107   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7108   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7109   int encode = prefixq_and_encode(dst->encoding());
7110   emit_int8((unsigned char)0xFF);
7111   emit_int8(0xC8 | encode);
7112 }
7113 
7114 void Assembler::decq(Address dst) {
7115   // Don't use it directly. Use MacroAssembler::decrementq() instead.
7116   InstructionMark im(this);
7117   prefixq(dst);
7118   emit_int8((unsigned char)0xFF);
7119   emit_operand(rcx, dst);
7120 }
7121 
7122 void Assembler::fxrstor(Address src) {
7123   prefixq(src);
7124   emit_int8(0x0F);
7125   emit_int8((unsigned char)0xAE);
7126   emit_operand(as_Register(1), src);
7127 }
7128 
7129 void Assembler::xrstor(Address src) {
7130   prefixq(src);
7131   emit_int8(0x0F);
7132   emit_int8((unsigned char)0xAE);
7133   emit_operand(as_Register(5), src);
7134 }
7135 
7136 void Assembler::fxsave(Address dst) {
7137   prefixq(dst);
7138   emit_int8(0x0F);
7139   emit_int8((unsigned char)0xAE);
7140   emit_operand(as_Register(0), dst);
7141 }
7142 
7143 void Assembler::xsave(Address dst) {
7144   prefixq(dst);
7145   emit_int8(0x0F);
7146   emit_int8((unsigned char)0xAE);
7147   emit_operand(as_Register(4), dst);
7148 }
7149 
7150 void Assembler::idivq(Register src) {
7151   int encode = prefixq_and_encode(src->encoding());
7152   emit_int8((unsigned char)0xF7);
7153   emit_int8((unsigned char)(0xF8 | encode));
7154 }
7155 
7156 void Assembler::imulq(Register dst, Register src) {
7157   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7158   emit_int8(0x0F);
7159   emit_int8((unsigned char)0xAF);
7160   emit_int8((unsigned char)(0xC0 | encode));
7161 }
7162 
7163 void Assembler::imulq(Register dst, Register src, int value) {
7164   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7165   if (is8bit(value)) {
7166     emit_int8(0x6B);
7167     emit_int8((unsigned char)(0xC0 | encode));
7168     emit_int8(value & 0xFF);
7169   } else {
7170     emit_int8(0x69);
7171     emit_int8((unsigned char)(0xC0 | encode));
7172     emit_int32(value);
7173   }
7174 }
7175 
7176 void Assembler::imulq(Register dst, Address src) {
7177   InstructionMark im(this);
7178   prefixq(src, dst);
7179   emit_int8(0x0F);
7180   emit_int8((unsigned char) 0xAF);
7181   emit_operand(dst, src);
7182 }
7183 
7184 void Assembler::incl(Register dst) {
7185   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7186   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7187   int encode = prefix_and_encode(dst->encoding());
7188   emit_int8((unsigned char)0xFF);
7189   emit_int8((unsigned char)(0xC0 | encode));
7190 }
7191 
7192 void Assembler::incq(Register dst) {
7193   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7194   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
7195   int encode = prefixq_and_encode(dst->encoding());
7196   emit_int8((unsigned char)0xFF);
7197   emit_int8((unsigned char)(0xC0 | encode));
7198 }
7199 
7200 void Assembler::incq(Address dst) {
7201   // Don't use it directly. Use MacroAssembler::incrementq() instead.
7202   InstructionMark im(this);
7203   prefixq(dst);
7204   emit_int8((unsigned char)0xFF);
7205   emit_operand(rax, dst);
7206 }
7207 
7208 void Assembler::lea(Register dst, Address src) {
7209   leaq(dst, src);
7210 }
7211 
7212 void Assembler::leaq(Register dst, Address src) {
7213   InstructionMark im(this);
7214   prefixq(src, dst);
7215   emit_int8((unsigned char)0x8D);
7216   emit_operand(dst, src);
7217 }
7218 
7219 void Assembler::mov64(Register dst, int64_t imm64) {
7220   InstructionMark im(this);
7221   int encode = prefixq_and_encode(dst->encoding());
7222   emit_int8((unsigned char)(0xB8 | encode));
7223   emit_int64(imm64);
7224 }
7225 
7226 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
7227   InstructionMark im(this);
7228   int encode = prefixq_and_encode(dst->encoding());
7229   emit_int8(0xB8 | encode);
7230   emit_data64(imm64, rspec);
7231 }
7232 
7233 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7234   InstructionMark im(this);
7235   int encode = prefix_and_encode(dst->encoding());
7236   emit_int8((unsigned char)(0xB8 | encode));
7237   emit_data((int)imm32, rspec, narrow_oop_operand);
7238 }
7239 
7240 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
7241   InstructionMark im(this);
7242   prefix(dst);
7243   emit_int8((unsigned char)0xC7);
7244   emit_operand(rax, dst, 4);
7245   emit_data((int)imm32, rspec, narrow_oop_operand);
7246 }
7247 
7248 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
7249   InstructionMark im(this);
7250   int encode = prefix_and_encode(src1->encoding());
7251   emit_int8((unsigned char)0x81);
7252   emit_int8((unsigned char)(0xF8 | encode));
7253   emit_data((int)imm32, rspec, narrow_oop_operand);
7254 }
7255 
7256 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
7257   InstructionMark im(this);
7258   prefix(src1);
7259   emit_int8((unsigned char)0x81);
7260   emit_operand(rax, src1, 4);
7261   emit_data((int)imm32, rspec, narrow_oop_operand);
7262 }
7263 
7264 void Assembler::lzcntq(Register dst, Register src) {
7265   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
7266   emit_int8((unsigned char)0xF3);
7267   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7268   emit_int8(0x0F);
7269   emit_int8((unsigned char)0xBD);
7270   emit_int8((unsigned char)(0xC0 | encode));
7271 }
7272 
7273 void Assembler::movdq(XMMRegister dst, Register src) {
7274   // table D-1 says MMX/SSE2
7275   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7276   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
7277   emit_int8(0x6E);
7278   emit_int8((unsigned char)(0xC0 | encode));
7279 }
7280 
7281 void Assembler::movdq(Register dst, XMMRegister src) {
7282   // table D-1 says MMX/SSE2
7283   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
7284   // swap src/dst to get correct prefix
7285   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
7286   emit_int8(0x7E);
7287   emit_int8((unsigned char)(0xC0 | encode));
7288 }
7289 
7290 void Assembler::movq(Register dst, Register src) {
7291   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7292   emit_int8((unsigned char)0x8B);
7293   emit_int8((unsigned char)(0xC0 | encode));
7294 }
7295 
7296 void Assembler::movq(Register dst, Address src) {
7297   InstructionMark im(this);
7298   prefixq(src, dst);
7299   emit_int8((unsigned char)0x8B);
7300   emit_operand(dst, src);
7301 }
7302 
7303 void Assembler::movq(Address dst, Register src) {
7304   InstructionMark im(this);
7305   prefixq(dst, src);
7306   emit_int8((unsigned char)0x89);
7307   emit_operand(src, dst);
7308 }
7309 
7310 void Assembler::movsbq(Register dst, Address src) {
7311   InstructionMark im(this);
7312   prefixq(src, dst);
7313   emit_int8(0x0F);
7314   emit_int8((unsigned char)0xBE);
7315   emit_operand(dst, src);
7316 }
7317 
7318 void Assembler::movsbq(Register dst, Register src) {
7319   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7320   emit_int8(0x0F);
7321   emit_int8((unsigned char)0xBE);
7322   emit_int8((unsigned char)(0xC0 | encode));
7323 }
7324 
7325 void Assembler::movslq(Register dst, int32_t imm32) {
7326   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
7327   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
7328   // as a result we shouldn't use until tested at runtime...
7329   ShouldNotReachHere();
7330   InstructionMark im(this);
7331   int encode = prefixq_and_encode(dst->encoding());
7332   emit_int8((unsigned char)(0xC7 | encode));
7333   emit_int32(imm32);
7334 }
7335 
7336 void Assembler::movslq(Address dst, int32_t imm32) {
7337   assert(is_simm32(imm32), "lost bits");
7338   InstructionMark im(this);
7339   prefixq(dst);
7340   emit_int8((unsigned char)0xC7);
7341   emit_operand(rax, dst, 4);
7342   emit_int32(imm32);
7343 }
7344 
7345 void Assembler::movslq(Register dst, Address src) {
7346   InstructionMark im(this);
7347   prefixq(src, dst);
7348   emit_int8(0x63);
7349   emit_operand(dst, src);
7350 }
7351 
7352 void Assembler::movslq(Register dst, Register src) {
7353   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7354   emit_int8(0x63);
7355   emit_int8((unsigned char)(0xC0 | encode));
7356 }
7357 
7358 void Assembler::movswq(Register dst, Address src) {
7359   InstructionMark im(this);
7360   prefixq(src, dst);
7361   emit_int8(0x0F);
7362   emit_int8((unsigned char)0xBF);
7363   emit_operand(dst, src);
7364 }
7365 
7366 void Assembler::movswq(Register dst, Register src) {
7367   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7368   emit_int8((unsigned char)0x0F);
7369   emit_int8((unsigned char)0xBF);
7370   emit_int8((unsigned char)(0xC0 | encode));
7371 }
7372 
7373 void Assembler::movzbq(Register dst, Address src) {
7374   InstructionMark im(this);
7375   prefixq(src, dst);
7376   emit_int8((unsigned char)0x0F);
7377   emit_int8((unsigned char)0xB6);
7378   emit_operand(dst, src);
7379 }
7380 
7381 void Assembler::movzbq(Register dst, Register src) {
7382   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7383   emit_int8(0x0F);
7384   emit_int8((unsigned char)0xB6);
7385   emit_int8(0xC0 | encode);
7386 }
7387 
7388 void Assembler::movzwq(Register dst, Address src) {
7389   InstructionMark im(this);
7390   prefixq(src, dst);
7391   emit_int8((unsigned char)0x0F);
7392   emit_int8((unsigned char)0xB7);
7393   emit_operand(dst, src);
7394 }
7395 
7396 void Assembler::movzwq(Register dst, Register src) {
7397   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7398   emit_int8((unsigned char)0x0F);
7399   emit_int8((unsigned char)0xB7);
7400   emit_int8((unsigned char)(0xC0 | encode));
7401 }
7402 
7403 void Assembler::mulq(Address src) {
7404   InstructionMark im(this);
7405   prefixq(src);
7406   emit_int8((unsigned char)0xF7);
7407   emit_operand(rsp, src);
7408 }
7409 
7410 void Assembler::mulq(Register src) {
7411   int encode = prefixq_and_encode(src->encoding());
7412   emit_int8((unsigned char)0xF7);
7413   emit_int8((unsigned char)(0xE0 | encode));
7414 }
7415 
7416 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
7417   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7418   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
7419                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7420   emit_int8((unsigned char)0xF6);
7421   emit_int8((unsigned char)(0xC0 | encode));
7422 }
7423 
7424 void Assembler::negq(Register dst) {
7425   int encode = prefixq_and_encode(dst->encoding());
7426   emit_int8((unsigned char)0xF7);
7427   emit_int8((unsigned char)(0xD8 | encode));
7428 }
7429 
7430 void Assembler::notq(Register dst) {
7431   int encode = prefixq_and_encode(dst->encoding());
7432   emit_int8((unsigned char)0xF7);
7433   emit_int8((unsigned char)(0xD0 | encode));
7434 }
7435 
7436 void Assembler::orq(Address dst, int32_t imm32) {
7437   InstructionMark im(this);
7438   prefixq(dst);
7439   emit_int8((unsigned char)0x81);
7440   emit_operand(rcx, dst, 4);
7441   emit_int32(imm32);
7442 }
7443 
7444 void Assembler::orq(Register dst, int32_t imm32) {
7445   (void) prefixq_and_encode(dst->encoding());
7446   emit_arith(0x81, 0xC8, dst, imm32);
7447 }
7448 
7449 void Assembler::orq(Register dst, Address src) {
7450   InstructionMark im(this);
7451   prefixq(src, dst);
7452   emit_int8(0x0B);
7453   emit_operand(dst, src);
7454 }
7455 
7456 void Assembler::orq(Register dst, Register src) {
7457   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7458   emit_arith(0x0B, 0xC0, dst, src);
7459 }
7460 
7461 void Assembler::popa() { // 64bit
7462   movq(r15, Address(rsp, 0));
7463   movq(r14, Address(rsp, wordSize));
7464   movq(r13, Address(rsp, 2 * wordSize));
7465   movq(r12, Address(rsp, 3 * wordSize));
7466   movq(r11, Address(rsp, 4 * wordSize));
7467   movq(r10, Address(rsp, 5 * wordSize));
7468   movq(r9,  Address(rsp, 6 * wordSize));
7469   movq(r8,  Address(rsp, 7 * wordSize));
7470   movq(rdi, Address(rsp, 8 * wordSize));
7471   movq(rsi, Address(rsp, 9 * wordSize));
7472   movq(rbp, Address(rsp, 10 * wordSize));
7473   // skip rsp
7474   movq(rbx, Address(rsp, 12 * wordSize));
7475   movq(rdx, Address(rsp, 13 * wordSize));
7476   movq(rcx, Address(rsp, 14 * wordSize));
7477   movq(rax, Address(rsp, 15 * wordSize));
7478 
7479   addq(rsp, 16 * wordSize);
7480 }
7481 
7482 void Assembler::popcntq(Register dst, Address src) {
7483   assert(VM_Version::supports_popcnt(), "must support");
7484   InstructionMark im(this);
7485   emit_int8((unsigned char)0xF3);
7486   prefixq(src, dst);
7487   emit_int8((unsigned char)0x0F);
7488   emit_int8((unsigned char)0xB8);
7489   emit_operand(dst, src);
7490 }
7491 
7492 void Assembler::popcntq(Register dst, Register src) {
7493   assert(VM_Version::supports_popcnt(), "must support");
7494   emit_int8((unsigned char)0xF3);
7495   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7496   emit_int8((unsigned char)0x0F);
7497   emit_int8((unsigned char)0xB8);
7498   emit_int8((unsigned char)(0xC0 | encode));
7499 }
7500 
7501 void Assembler::popq(Address dst) {
7502   InstructionMark im(this);
7503   prefixq(dst);
7504   emit_int8((unsigned char)0x8F);
7505   emit_operand(rax, dst);
7506 }
7507 
7508 void Assembler::pusha() { // 64bit
7509   // we have to store original rsp.  ABI says that 128 bytes
7510   // below rsp are local scratch.
7511   movq(Address(rsp, -5 * wordSize), rsp);
7512 
7513   subq(rsp, 16 * wordSize);
7514 
7515   movq(Address(rsp, 15 * wordSize), rax);
7516   movq(Address(rsp, 14 * wordSize), rcx);
7517   movq(Address(rsp, 13 * wordSize), rdx);
7518   movq(Address(rsp, 12 * wordSize), rbx);
7519   // skip rsp
7520   movq(Address(rsp, 10 * wordSize), rbp);
7521   movq(Address(rsp, 9 * wordSize), rsi);
7522   movq(Address(rsp, 8 * wordSize), rdi);
7523   movq(Address(rsp, 7 * wordSize), r8);
7524   movq(Address(rsp, 6 * wordSize), r9);
7525   movq(Address(rsp, 5 * wordSize), r10);
7526   movq(Address(rsp, 4 * wordSize), r11);
7527   movq(Address(rsp, 3 * wordSize), r12);
7528   movq(Address(rsp, 2 * wordSize), r13);
7529   movq(Address(rsp, wordSize), r14);
7530   movq(Address(rsp, 0), r15);
7531 }
7532 
7533 void Assembler::pushq(Address src) {
7534   InstructionMark im(this);
7535   prefixq(src);
7536   emit_int8((unsigned char)0xFF);
7537   emit_operand(rsi, src);
7538 }
7539 
7540 void Assembler::rclq(Register dst, int imm8) {
7541   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7542   int encode = prefixq_and_encode(dst->encoding());
7543   if (imm8 == 1) {
7544     emit_int8((unsigned char)0xD1);
7545     emit_int8((unsigned char)(0xD0 | encode));
7546   } else {
7547     emit_int8((unsigned char)0xC1);
7548     emit_int8((unsigned char)(0xD0 | encode));
7549     emit_int8(imm8);
7550   }
7551 }
7552 
7553 void Assembler::rcrq(Register dst, int imm8) {
7554   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7555   int encode = prefixq_and_encode(dst->encoding());
7556   if (imm8 == 1) {
7557     emit_int8((unsigned char)0xD1);
7558     emit_int8((unsigned char)(0xD8 | encode));
7559   } else {
7560     emit_int8((unsigned char)0xC1);
7561     emit_int8((unsigned char)(0xD8 | encode));
7562     emit_int8(imm8);
7563   }
7564 }
7565 
7566 void Assembler::rorq(Register dst, int imm8) {
7567   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7568   int encode = prefixq_and_encode(dst->encoding());
7569   if (imm8 == 1) {
7570     emit_int8((unsigned char)0xD1);
7571     emit_int8((unsigned char)(0xC8 | encode));
7572   } else {
7573     emit_int8((unsigned char)0xC1);
7574     emit_int8((unsigned char)(0xc8 | encode));
7575     emit_int8(imm8);
7576   }
7577 }
7578 
7579 void Assembler::rorxq(Register dst, Register src, int imm8) {
7580   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7581   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
7582                                      /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
7583   emit_int8((unsigned char)0xF0);
7584   emit_int8((unsigned char)(0xC0 | encode));
7585   emit_int8(imm8);
7586 }
7587 
7588 void Assembler::sarq(Register dst, int imm8) {
7589   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7590   int encode = prefixq_and_encode(dst->encoding());
7591   if (imm8 == 1) {
7592     emit_int8((unsigned char)0xD1);
7593     emit_int8((unsigned char)(0xF8 | encode));
7594   } else {
7595     emit_int8((unsigned char)0xC1);
7596     emit_int8((unsigned char)(0xF8 | encode));
7597     emit_int8(imm8);
7598   }
7599 }
7600 
7601 void Assembler::sarq(Register dst) {
7602   int encode = prefixq_and_encode(dst->encoding());
7603   emit_int8((unsigned char)0xD3);
7604   emit_int8((unsigned char)(0xF8 | encode));
7605 }
7606 
7607 void Assembler::sbbq(Address dst, int32_t imm32) {
7608   InstructionMark im(this);
7609   prefixq(dst);
7610   emit_arith_operand(0x81, rbx, dst, imm32);
7611 }
7612 
7613 void Assembler::sbbq(Register dst, int32_t imm32) {
7614   (void) prefixq_and_encode(dst->encoding());
7615   emit_arith(0x81, 0xD8, dst, imm32);
7616 }
7617 
7618 void Assembler::sbbq(Register dst, Address src) {
7619   InstructionMark im(this);
7620   prefixq(src, dst);
7621   emit_int8(0x1B);
7622   emit_operand(dst, src);
7623 }
7624 
7625 void Assembler::sbbq(Register dst, Register src) {
7626   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7627   emit_arith(0x1B, 0xC0, dst, src);
7628 }
7629 
7630 void Assembler::shlq(Register dst, int imm8) {
7631   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7632   int encode = prefixq_and_encode(dst->encoding());
7633   if (imm8 == 1) {
7634     emit_int8((unsigned char)0xD1);
7635     emit_int8((unsigned char)(0xE0 | encode));
7636   } else {
7637     emit_int8((unsigned char)0xC1);
7638     emit_int8((unsigned char)(0xE0 | encode));
7639     emit_int8(imm8);
7640   }
7641 }
7642 
7643 void Assembler::shlq(Register dst) {
7644   int encode = prefixq_and_encode(dst->encoding());
7645   emit_int8((unsigned char)0xD3);
7646   emit_int8((unsigned char)(0xE0 | encode));
7647 }
7648 
7649 void Assembler::shrq(Register dst, int imm8) {
7650   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7651   int encode = prefixq_and_encode(dst->encoding());
7652   emit_int8((unsigned char)0xC1);
7653   emit_int8((unsigned char)(0xE8 | encode));
7654   emit_int8(imm8);
7655 }
7656 
7657 void Assembler::shrq(Register dst) {
7658   int encode = prefixq_and_encode(dst->encoding());
7659   emit_int8((unsigned char)0xD3);
7660   emit_int8(0xE8 | encode);
7661 }
7662 
7663 void Assembler::subq(Address dst, int32_t imm32) {
7664   InstructionMark im(this);
7665   prefixq(dst);
7666   emit_arith_operand(0x81, rbp, dst, imm32);
7667 }
7668 
7669 void Assembler::subq(Address dst, Register src) {
7670   InstructionMark im(this);
7671   prefixq(dst, src);
7672   emit_int8(0x29);
7673   emit_operand(src, dst);
7674 }
7675 
7676 void Assembler::subq(Register dst, int32_t imm32) {
7677   (void) prefixq_and_encode(dst->encoding());
7678   emit_arith(0x81, 0xE8, dst, imm32);
7679 }
7680 
7681 // Force generation of a 4 byte immediate value even if it fits into 8bit
7682 void Assembler::subq_imm32(Register dst, int32_t imm32) {
7683   (void) prefixq_and_encode(dst->encoding());
7684   emit_arith_imm32(0x81, 0xE8, dst, imm32);
7685 }
7686 
7687 void Assembler::subq(Register dst, Address src) {
7688   InstructionMark im(this);
7689   prefixq(src, dst);
7690   emit_int8(0x2B);
7691   emit_operand(dst, src);
7692 }
7693 
7694 void Assembler::subq(Register dst, Register src) {
7695   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7696   emit_arith(0x2B, 0xC0, dst, src);
7697 }
7698 
7699 void Assembler::testq(Register dst, int32_t imm32) {
7700   // not using emit_arith because test
7701   // doesn't support sign-extension of
7702   // 8bit operands
7703   int encode = dst->encoding();
7704   if (encode == 0) {
7705     prefix(REX_W);
7706     emit_int8((unsigned char)0xA9);
7707   } else {
7708     encode = prefixq_and_encode(encode);
7709     emit_int8((unsigned char)0xF7);
7710     emit_int8((unsigned char)(0xC0 | encode));
7711   }
7712   emit_int32(imm32);
7713 }
7714 
7715 void Assembler::testq(Register dst, Register src) {
7716   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7717   emit_arith(0x85, 0xC0, dst, src);
7718 }
7719 
7720 void Assembler::xaddq(Address dst, Register src) {
7721   InstructionMark im(this);
7722   prefixq(dst, src);
7723   emit_int8(0x0F);
7724   emit_int8((unsigned char)0xC1);
7725   emit_operand(src, dst);
7726 }
7727 
7728 void Assembler::xchgq(Register dst, Address src) {
7729   InstructionMark im(this);
7730   prefixq(src, dst);
7731   emit_int8((unsigned char)0x87);
7732   emit_operand(dst, src);
7733 }
7734 
7735 void Assembler::xchgq(Register dst, Register src) {
7736   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7737   emit_int8((unsigned char)0x87);
7738   emit_int8((unsigned char)(0xc0 | encode));
7739 }
7740 
7741 void Assembler::xorq(Register dst, Register src) {
7742   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7743   emit_arith(0x33, 0xC0, dst, src);
7744 }
7745 
7746 void Assembler::xorq(Register dst, Address src) {
7747   InstructionMark im(this);
7748   prefixq(src, dst);
7749   emit_int8(0x33);
7750   emit_operand(dst, src);
7751 }
7752 
7753 #endif // !LP64