New src/cpu/x86/vm/assembler

   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "utilities/macros.hpp"
  40 #if INCLUDE_ALL_GCS
  41 #include "gc/g1/g1CollectedHeap.inline.hpp"
  42 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  43 #include "gc/g1/heapRegion.hpp"
  44 #endif // INCLUDE_ALL_GCS
  45 
  46 #ifdef PRODUCT
  47 #define BLOCK_COMMENT(str) /* nothing */
  48 #define STOP(error) stop(error)
  49 #else
  50 #define BLOCK_COMMENT(str) block_comment(str)
  51 #define STOP(error) block_comment(error); stop(error)
  52 #endif
  53 
  54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  55 // Implementation of AddressLiteral
  56 
  57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  59   // -----------------Table 4.5 -------------------- //
  60   16, 32, 64,  // EVEX_FV(0)
  61   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  62   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  63   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  64   8,  16, 32,  // EVEX_HV(0)
  65   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  66   // -----------------Table 4.6 -------------------- //
  67   16, 32, 64,  // EVEX_FVM(0)
  68   1,  1,  1,   // EVEX_T1S(0)
  69   2,  2,  2,   // EVEX_T1S(1)
  70   4,  4,  4,   // EVEX_T1S(2)
  71   8,  8,  8,   // EVEX_T1S(3)
  72   4,  4,  4,   // EVEX_T1F(0)
  73   8,  8,  8,   // EVEX_T1F(1)
  74   8,  8,  8,   // EVEX_T2(0)
  75   0,  16, 16,  // EVEX_T2(1)
  76   0,  16, 16,  // EVEX_T4(0)
  77   0,  0,  32,  // EVEX_T4(1)
  78   0,  0,  32,  // EVEX_T8(0)
  79   8,  16, 32,  // EVEX_HVM(0)
  80   4,  8,  16,  // EVEX_QVM(0)
  81   2,  4,  8,   // EVEX_OVM(0)
  82   16, 16, 16,  // EVEX_M128(0)
  83   8,  32, 64,  // EVEX_DUP(0)
  84   0,  0,  0    // EVEX_NTUP
  85 };
  86 
  87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  88   _is_lval = false;
  89   _target = target;
  90   switch (rtype) {
  91   case relocInfo::oop_type:
  92   case relocInfo::metadata_type:
  93     // Oops are a special case. Normally they would be their own section
  94     // but in cases like icBuffer they are literals in the code stream that
  95     // we don't have a section for. We use none so that we get a literal address
  96     // which is always patchable.
  97     break;
  98   case relocInfo::external_word_type:
  99     _rspec = external_word_Relocation::spec(target);
 100     break;
 101   case relocInfo::internal_word_type:
 102     _rspec = internal_word_Relocation::spec(target);
 103     break;
 104   case relocInfo::opt_virtual_call_type:
 105     _rspec = opt_virtual_call_Relocation::spec();
 106     break;
 107   case relocInfo::static_call_type:
 108     _rspec = static_call_Relocation::spec();
 109     break;
 110   case relocInfo::runtime_call_type:
 111     _rspec = runtime_call_Relocation::spec();
 112     break;
 113   case relocInfo::poll_type:
 114   case relocInfo::poll_return_type:
 115     _rspec = Relocation::spec_simple(rtype);
 116     break;
 117   case relocInfo::none:
 118     break;
 119   default:
 120     ShouldNotReachHere();
 121     break;
 122   }
 123 }
 124 
 125 // Implementation of Address
 126 
 127 #ifdef _LP64
 128 
 129 Address Address::make_array(ArrayAddress adr) {
 130   // Not implementable on 64bit machines
 131   // Should have been handled higher up the call chain.
 132   ShouldNotReachHere();
 133   return Address();
 134 }
 135 
 136 // exceedingly dangerous constructor
 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 138   _base  = noreg;
 139   _index = noreg;
 140   _scale = no_scale;
 141   _disp  = disp;
 142   switch (rtype) {
 143     case relocInfo::external_word_type:
 144       _rspec = external_word_Relocation::spec(loc);
 145       break;
 146     case relocInfo::internal_word_type:
 147       _rspec = internal_word_Relocation::spec(loc);
 148       break;
 149     case relocInfo::runtime_call_type:
 150       // HMM
 151       _rspec = runtime_call_Relocation::spec();
 152       break;
 153     case relocInfo::poll_type:
 154     case relocInfo::poll_return_type:
 155       _rspec = Relocation::spec_simple(rtype);
 156       break;
 157     case relocInfo::none:
 158       break;
 159     default:
 160       ShouldNotReachHere();
 161   }
 162 }
 163 #else // LP64
 164 
 165 Address Address::make_array(ArrayAddress adr) {
 166   AddressLiteral base = adr.base();
 167   Address index = adr.index();
 168   assert(index._disp == 0, "must not have disp"); // maybe it can?
 169   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 170   array._rspec = base._rspec;
 171   return array;
 172 }
 173 
 174 // exceedingly dangerous constructor
 175 Address::Address(address loc, RelocationHolder spec) {
 176   _base  = noreg;
 177   _index = noreg;
 178   _scale = no_scale;
 179   _disp  = (intptr_t) loc;
 180   _rspec = spec;
 181 }
 182 
 183 #endif // _LP64
 184 
 185 
 186 
 187 // Convert the raw encoding form into the form expected by the constructor for
 188 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 189 // that to noreg for the Address constructor.
 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 191   RelocationHolder rspec;
 192   if (disp_reloc != relocInfo::none) {
 193     rspec = Relocation::spec_simple(disp_reloc);
 194   }
 195   bool valid_index = index != rsp->encoding();
 196   if (valid_index) {
 197     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 198     madr._rspec = rspec;
 199     return madr;
 200   } else {
 201     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 202     madr._rspec = rspec;
 203     return madr;
 204   }
 205 }
 206 
 207 // Implementation of Assembler
 208 
 209 int AbstractAssembler::code_fill_byte() {
 210   return (u_char)'\xF4'; // hlt
 211 }
 212 
 213 // make this go away someday
 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 215   if (rtype == relocInfo::none)
 216     emit_int32(data);
 217   else
 218     emit_data(data, Relocation::spec_simple(rtype), format);
 219 }
 220 
 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 222   assert(imm_operand == 0, "default format must be immediate in this file");
 223   assert(inst_mark() != NULL, "must be inside InstructionMark");
 224   if (rspec.type() !=  relocInfo::none) {
 225     #ifdef ASSERT
 226       check_relocation(rspec, format);
 227     #endif
 228     // Do not use AbstractAssembler::relocate, which is not intended for
 229     // embedded words.  Instead, relocate to the enclosing instruction.
 230 
 231     // hack. call32 is too wide for mask so use disp32
 232     if (format == call32_operand)
 233       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 234     else
 235       code_section()->relocate(inst_mark(), rspec, format);
 236   }
 237   emit_int32(data);
 238 }
 239 
 240 static int encode(Register r) {
 241   int enc = r->encoding();
 242   if (enc >= 8) {
 243     enc -= 8;
 244   }
 245   return enc;
 246 }
 247 
 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 249   assert(dst->has_byte_register(), "must have byte register");
 250   assert(isByte(op1) && isByte(op2), "wrong opcode");
 251   assert(isByte(imm8), "not a byte");
 252   assert((op1 & 0x01) == 0, "should be 8bit operation");
 253   emit_int8(op1);
 254   emit_int8(op2 | encode(dst));
 255   emit_int8(imm8);
 256 }
 257 
 258 
 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 260   assert(isByte(op1) && isByte(op2), "wrong opcode");
 261   assert((op1 & 0x01) == 1, "should be 32bit operation");
 262   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 263   if (is8bit(imm32)) {
 264     emit_int8(op1 | 0x02); // set sign bit
 265     emit_int8(op2 | encode(dst));
 266     emit_int8(imm32 & 0xFF);
 267   } else {
 268     emit_int8(op1);
 269     emit_int8(op2 | encode(dst));
 270     emit_int32(imm32);
 271   }
 272 }
 273 
 274 // Force generation of a 4 byte immediate value even if it fits into 8bit
 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 276   assert(isByte(op1) && isByte(op2), "wrong opcode");
 277   assert((op1 & 0x01) == 1, "should be 32bit operation");
 278   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 279   emit_int8(op1);
 280   emit_int8(op2 | encode(dst));
 281   emit_int32(imm32);
 282 }
 283 
 284 // immediate-to-memory forms
 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 286   assert((op1 & 0x01) == 1, "should be 32bit operation");
 287   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 288   if (is8bit(imm32)) {
 289     emit_int8(op1 | 0x02); // set sign bit
 290     emit_operand(rm, adr, 1);
 291     emit_int8(imm32 & 0xFF);
 292   } else {
 293     emit_int8(op1);
 294     emit_operand(rm, adr, 4);
 295     emit_int32(imm32);
 296   }
 297 }
 298 
 299 
 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 301   assert(isByte(op1) && isByte(op2), "wrong opcode");
 302   emit_int8(op1);
 303   emit_int8(op2 | encode(dst) << 3 | encode(src));
 304 }
 305 
 306 
 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 308                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 309   int mod_idx = 0;
 310   // We will test if the displacement fits the compressed format and if so
 311   // apply the compression to the displacment iff the result is8bit.
 312   if (VM_Version::supports_evex() && is_evex_inst) {
 313     switch (cur_tuple_type) {
 314     case EVEX_FV:
 315       if ((cur_encoding & VEX_W) == VEX_W) {
 316         mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       } else {
 318         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 319       }
 320       break;
 321 
 322     case EVEX_HV:
 323       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 324       break;
 325 
 326     case EVEX_FVM:
 327       break;
 328 
 329     case EVEX_T1S:
 330       switch (in_size_in_bits) {
 331       case EVEX_8bit:
 332         break;
 333 
 334       case EVEX_16bit:
 335         mod_idx = 1;
 336         break;
 337 
 338       case EVEX_32bit:
 339         mod_idx = 2;
 340         break;
 341 
 342       case EVEX_64bit:
 343         mod_idx = 3;
 344         break;
 345       }
 346       break;
 347 
 348     case EVEX_T1F:
 349     case EVEX_T2:
 350     case EVEX_T4:
 351       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 352       break;
 353 
 354     case EVEX_T8:
 355       break;
 356 
 357     case EVEX_HVM:
 358       break;
 359 
 360     case EVEX_QVM:
 361       break;
 362 
 363     case EVEX_OVM:
 364       break;
 365 
 366     case EVEX_M128:
 367       break;
 368 
 369     case EVEX_DUP:
 370       break;
 371 
 372     default:
 373       assert(0, "no valid evex tuple_table entry");
 374       break;
 375     }
 376 
 377     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 378       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 379       if ((disp % disp_factor) == 0) {
 380         int new_disp = disp / disp_factor;
 381         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 382           disp = new_disp;
 383         }
 384       } else {
 385         return false;
 386       }
 387     }
 388   }
 389   return (-0x80 <= disp && disp < 0x80);
 390 }
 391 
 392 
 393 bool Assembler::emit_compressed_disp_byte(int &disp) {
 394   int mod_idx = 0;
 395   // We will test if the displacement fits the compressed format and if so
 396   // apply the compression to the displacment iff the result is8bit.
 397   if (VM_Version::supports_evex() && is_evex_instruction) {
 398     switch (tuple_type) {
 399     case EVEX_FV:
 400       if ((evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 402       } else {
 403         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (input_size_in_bits) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) {
 463       int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len];
 464       if ((disp % disp_factor) == 0) {
 465         int new_disp = disp / disp_factor;
 466         if (is8bit(new_disp)) {
 467           disp = new_disp;
 468         }
 469       } else {
 470         return false;
 471       }
 472     }
 473   }
 474   return is8bit(disp);
 475 }
 476 
 477 
 478 void Assembler::emit_operand(Register reg, Register base, Register index,
 479                              Address::ScaleFactor scale, int disp,
 480                              RelocationHolder const& rspec,
 481                              int rip_relative_correction) {
 482   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 483 
 484   // Encode the registers as needed in the fields they are used in
 485 
 486   int regenc = encode(reg) << 3;
 487   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 488   int baseenc = base->is_valid() ? encode(base) : 0;
 489 
 490   if (base->is_valid()) {
 491     if (index->is_valid()) {
 492       assert(scale != Address::no_scale, "inconsistent address");
 493       // [base + index*scale + disp]
 494       if (disp == 0 && rtype == relocInfo::none  &&
 495           base != rbp LP64_ONLY(&& base != r13)) {
 496         // [base + index*scale]
 497         // [00 reg 100][ss index base]
 498         assert(index != rsp, "illegal addressing mode");
 499         emit_int8(0x04 | regenc);
 500         emit_int8(scale << 6 | indexenc | baseenc);
 501       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 502         // [base + index*scale + imm8]
 503         // [01 reg 100][ss index base] imm8
 504         assert(index != rsp, "illegal addressing mode");
 505         emit_int8(0x44 | regenc);
 506         emit_int8(scale << 6 | indexenc | baseenc);
 507         emit_int8(disp & 0xFF);
 508       } else {
 509         // [base + index*scale + disp32]
 510         // [10 reg 100][ss index base] disp32
 511         assert(index != rsp, "illegal addressing mode");
 512         emit_int8(0x84 | regenc);
 513         emit_int8(scale << 6 | indexenc | baseenc);
 514         emit_data(disp, rspec, disp32_operand);
 515       }
 516     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 517       // [rsp + disp]
 518       if (disp == 0 && rtype == relocInfo::none) {
 519         // [rsp]
 520         // [00 reg 100][00 100 100]
 521         emit_int8(0x04 | regenc);
 522         emit_int8(0x24);
 523       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 524         // [rsp + imm8]
 525         // [01 reg 100][00 100 100] disp8
 526         emit_int8(0x44 | regenc);
 527         emit_int8(0x24);
 528         emit_int8(disp & 0xFF);
 529       } else {
 530         // [rsp + imm32]
 531         // [10 reg 100][00 100 100] disp32
 532         emit_int8(0x84 | regenc);
 533         emit_int8(0x24);
 534         emit_data(disp, rspec, disp32_operand);
 535       }
 536     } else {
 537       // [base + disp]
 538       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 539       if (disp == 0 && rtype == relocInfo::none &&
 540           base != rbp LP64_ONLY(&& base != r13)) {
 541         // [base]
 542         // [00 reg base]
 543         emit_int8(0x00 | regenc | baseenc);
 544       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 545         // [base + disp8]
 546         // [01 reg base] disp8
 547         emit_int8(0x40 | regenc | baseenc);
 548         emit_int8(disp & 0xFF);
 549       } else {
 550         // [base + disp32]
 551         // [10 reg base] disp32
 552         emit_int8(0x80 | regenc | baseenc);
 553         emit_data(disp, rspec, disp32_operand);
 554       }
 555     }
 556   } else {
 557     if (index->is_valid()) {
 558       assert(scale != Address::no_scale, "inconsistent address");
 559       // [index*scale + disp]
 560       // [00 reg 100][ss index 101] disp32
 561       assert(index != rsp, "illegal addressing mode");
 562       emit_int8(0x04 | regenc);
 563       emit_int8(scale << 6 | indexenc | 0x05);
 564       emit_data(disp, rspec, disp32_operand);
 565     } else if (rtype != relocInfo::none ) {
 566       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 567       // [00 000 101] disp32
 568 
 569       emit_int8(0x05 | regenc);
 570       // Note that the RIP-rel. correction applies to the generated
 571       // disp field, but _not_ to the target address in the rspec.
 572 
 573       // disp was created by converting the target address minus the pc
 574       // at the start of the instruction. That needs more correction here.
 575       // intptr_t disp = target - next_ip;
 576       assert(inst_mark() != NULL, "must be inside InstructionMark");
 577       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 578       int64_t adjusted = disp;
 579       // Do rip-rel adjustment for 64bit
 580       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 581       assert(is_simm32(adjusted),
 582              "must be 32bit offset (RIP relative address)");
 583       emit_data((int32_t) adjusted, rspec, disp32_operand);
 584 
 585     } else {
 586       // 32bit never did this, did everything as the rip-rel/disp code above
 587       // [disp] ABSOLUTE
 588       // [00 reg 100][00 100 101] disp32
 589       emit_int8(0x04 | regenc);
 590       emit_int8(0x25);
 591       emit_data(disp, rspec, disp32_operand);
 592     }
 593   }
 594   is_evex_instruction = false;
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;
 628   bool is_64bit = false;
 629 
 630   debug_only(bool has_disp32 = false);
 631   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 632 
 633   again_after_prefix:
 634   switch (0xFF & *ip++) {
 635 
 636   // These convenience macros generate groups of "case" labels for the switch.
 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 639              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 640 #define REP16(x) REP8((x)+0): \
 641               case REP8((x)+8)
 642 
 643   case CS_segment:
 644   case SS_segment:
 645   case DS_segment:
 646   case ES_segment:
 647   case FS_segment:
 648   case GS_segment:
 649     // Seems dubious
 650     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 651     assert(ip == inst+1, "only one prefix allowed");
 652     goto again_after_prefix;
 653 
 654   case 0x67:
 655   case REX:
 656   case REX_B:
 657   case REX_X:
 658   case REX_XB:
 659   case REX_R:
 660   case REX_RB:
 661   case REX_RX:
 662   case REX_RXB:
 663     NOT_LP64(assert(false, "64bit prefixes"));
 664     goto again_after_prefix;
 665 
 666   case REX_W:
 667   case REX_WB:
 668   case REX_WX:
 669   case REX_WXB:
 670   case REX_WR:
 671   case REX_WRB:
 672   case REX_WRX:
 673   case REX_WRXB:
 674     NOT_LP64(assert(false, "64bit prefixes"));
 675     is_64bit = true;
 676     goto again_after_prefix;
 677 
 678   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 679   case 0x88: // movb a, r
 680   case 0x89: // movl a, r
 681   case 0x8A: // movb r, a
 682   case 0x8B: // movl r, a
 683   case 0x8F: // popl a
 684     debug_only(has_disp32 = true);
 685     break;
 686 
 687   case 0x68: // pushq #32
 688     if (which == end_pc_operand) {
 689       return ip + 4;
 690     }
 691     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 692     return ip;                  // not produced by emit_operand
 693 
 694   case 0x66: // movw ... (size prefix)
 695     again_after_size_prefix2:
 696     switch (0xFF & *ip++) {
 697     case REX:
 698     case REX_B:
 699     case REX_X:
 700     case REX_XB:
 701     case REX_R:
 702     case REX_RB:
 703     case REX_RX:
 704     case REX_RXB:
 705     case REX_W:
 706     case REX_WB:
 707     case REX_WX:
 708     case REX_WXB:
 709     case REX_WR:
 710     case REX_WRB:
 711     case REX_WRX:
 712     case REX_WRXB:
 713       NOT_LP64(assert(false, "64bit prefix found"));
 714       goto again_after_size_prefix2;
 715     case 0x8B: // movw r, a
 716     case 0x89: // movw a, r
 717       debug_only(has_disp32 = true);
 718       break;
 719     case 0xC7: // movw a, #16
 720       debug_only(has_disp32 = true);
 721       tail_size = 2;  // the imm16
 722       break;
 723     case 0x0F: // several SSE/SSE2 variants
 724       ip--;    // reparse the 0x0F
 725       goto again_after_prefix;
 726     default:
 727       ShouldNotReachHere();
 728     }
 729     break;
 730 
 731   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 732     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 733     // these asserts are somewhat nonsensical
 734 #ifndef _LP64
 735     assert(which == imm_operand || which == disp32_operand,
 736            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 737 #else
 738     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 739            which == narrow_oop_operand && !is_64bit,
 740            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
 741 #endif // _LP64
 742     return ip;
 743 
 744   case 0x69: // imul r, a, #32
 745   case 0xC7: // movl a, #32(oop?)
 746     tail_size = 4;
 747     debug_only(has_disp32 = true); // has both kinds of operands!
 748     break;
 749 
 750   case 0x0F: // movx..., etc.
 751     switch (0xFF & *ip++) {
 752     case 0x3A: // pcmpestri
 753       tail_size = 1;
 754     case 0x38: // ptest, pmovzxbw
 755       ip++; // skip opcode
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757       break;
 758 
 759     case 0x70: // pshufd r, r/a, #8
 760       debug_only(has_disp32 = true); // has both kinds of operands!
 761     case 0x73: // psrldq r, #8
 762       tail_size = 1;
 763       break;
 764 
 765     case 0x12: // movlps
 766     case 0x28: // movaps
 767     case 0x2E: // ucomiss
 768     case 0x2F: // comiss
 769     case 0x54: // andps
 770     case 0x55: // andnps
 771     case 0x56: // orps
 772     case 0x57: // xorps
 773     case 0x6E: // movd
 774     case 0x7E: // movd
 775     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 776       debug_only(has_disp32 = true);
 777       break;
 778 
 779     case 0xAD: // shrd r, a, %cl
 780     case 0xAF: // imul r, a
 781     case 0xBE: // movsbl r, a (movsxb)
 782     case 0xBF: // movswl r, a (movsxw)
 783     case 0xB6: // movzbl r, a (movzxb)
 784     case 0xB7: // movzwl r, a (movzxw)
 785     case REP16(0x40): // cmovl cc, r, a
 786     case 0xB0: // cmpxchgb
 787     case 0xB1: // cmpxchg
 788     case 0xC1: // xaddl
 789     case 0xC7: // cmpxchg8
 790     case REP16(0x90): // setcc a
 791       debug_only(has_disp32 = true);
 792       // fall out of the switch to decode the address
 793       break;
 794 
 795     case 0xC4: // pinsrw r, a, #8
 796       debug_only(has_disp32 = true);
 797     case 0xC5: // pextrw r, r, #8
 798       tail_size = 1;  // the imm8
 799       break;
 800 
 801     case 0xAC: // shrd r, a, #8
 802       debug_only(has_disp32 = true);
 803       tail_size = 1;  // the imm8
 804       break;
 805 
 806     case REP16(0x80): // jcc rdisp32
 807       if (which == end_pc_operand)  return ip + 4;
 808       assert(which == call32_operand, "jcc has no disp32 or imm");
 809       return ip;
 810     default:
 811       ShouldNotReachHere();
 812     }
 813     break;
 814 
 815   case 0x81: // addl a, #32; addl r, #32
 816     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 817     // on 32bit in the case of cmpl, the imm might be an oop
 818     tail_size = 4;
 819     debug_only(has_disp32 = true); // has both kinds of operands!
 820     break;
 821 
 822   case 0x83: // addl a, #8; addl r, #8
 823     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 824     debug_only(has_disp32 = true); // has both kinds of operands!
 825     tail_size = 1;
 826     break;
 827 
 828   case 0x9B:
 829     switch (0xFF & *ip++) {
 830     case 0xD9: // fnstcw a
 831       debug_only(has_disp32 = true);
 832       break;
 833     default:
 834       ShouldNotReachHere();
 835     }
 836     break;
 837 
 838   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 839   case REP4(0x10): // adc...
 840   case REP4(0x20): // and...
 841   case REP4(0x30): // xor...
 842   case REP4(0x08): // or...
 843   case REP4(0x18): // sbb...
 844   case REP4(0x28): // sub...
 845   case 0xF7: // mull a
 846   case 0x8D: // lea r, a
 847   case 0x87: // xchg r, a
 848   case REP4(0x38): // cmp...
 849   case 0x85: // test r, a
 850     debug_only(has_disp32 = true); // has both kinds of operands!
 851     break;
 852 
 853   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 854   case 0xC6: // movb a, #8
 855   case 0x80: // cmpb a, #8
 856   case 0x6B: // imul r, a, #8
 857     debug_only(has_disp32 = true); // has both kinds of operands!
 858     tail_size = 1; // the imm8
 859     break;
 860 
 861   case 0xC4: // VEX_3bytes
 862   case 0xC5: // VEX_2bytes
 863     assert((UseAVX > 0), "shouldn't have VEX prefix");
 864     assert(ip == inst+1, "no prefixes allowed");
 865     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 866     // but they have prefix 0x0F and processed when 0x0F processed above.
 867     //
 868     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 869     // instructions (these instructions are not supported in 64-bit mode).
 870     // To distinguish them bits [7:6] are set in the VEX second byte since
 871     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 872     // those VEX bits REX and vvvv bits are inverted.
 873     //
 874     // Fortunately C2 doesn't generate these instructions so we don't need
 875     // to check for them in product version.
 876 
 877     // Check second byte
 878     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 879 
 880     // First byte
 881     if ((0xFF & *inst) == VEX_3bytes) {
 882       ip++; // third byte
 883       is_64bit = ((VEX_W & *ip) == VEX_W);
 884     }
 885     ip++; // opcode
 886     // To find the end of instruction (which == end_pc_operand).
 887     switch (0xFF & *ip) {
 888     case 0x61: // pcmpestri r, r/a, #8
 889     case 0x70: // pshufd r, r/a, #8
 890     case 0x73: // psrldq r, #8
 891       tail_size = 1;  // the imm8
 892       break;
 893     default:
 894       break;
 895     }
 896     ip++; // skip opcode
 897     debug_only(has_disp32 = true); // has both kinds of operands!
 898     break;
 899 
 900   case 0x62: // EVEX_4bytes
 901     assert((UseAVX > 0), "shouldn't have EVEX prefix");
 902     assert(ip == inst+1, "no prefixes allowed");
 903     // no EVEX collisions, all instructions that have 0x62 opcodes
 904     // have EVEX versions and are subopcodes of 0x66
 905     ip++; // skip P0 and exmaine W in P1
 906     is_64bit = ((VEX_W & *ip) == VEX_W);
 907     ip++; // move to P2
 908     ip++; // skip P2, move to opcode
 909     // To find the end of instruction (which == end_pc_operand).
 910     switch (0xFF & *ip) {
 911     case 0x61: // pcmpestri r, r/a, #8
 912     case 0x70: // pshufd r, r/a, #8
 913     case 0x73: // psrldq r, #8
 914       tail_size = 1;  // the imm8
 915       break;
 916     default:
 917       break;
 918     }
 919     ip++; // skip opcode
 920     debug_only(has_disp32 = true); // has both kinds of operands!
 921     break;
 922 
 923   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 924   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 925   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 926   case 0xDD: // fld_d a; fst_d a; fstp_d a
 927   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 928   case 0xDF: // fild_d a; fistp_d a
 929   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 930   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 931   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 932     debug_only(has_disp32 = true);
 933     break;
 934 
 935   case 0xE8: // call rdisp32
 936   case 0xE9: // jmp  rdisp32
 937     if (which == end_pc_operand)  return ip + 4;
 938     assert(which == call32_operand, "call has no disp32 or imm");
 939     return ip;
 940 
 941   case 0xF0:                    // Lock
 942     assert(os::is_MP(), "only on MP");
 943     goto again_after_prefix;
 944 
 945   case 0xF3:                    // For SSE
 946   case 0xF2:                    // For SSE2
 947     switch (0xFF & *ip++) {
 948     case REX:
 949     case REX_B:
 950     case REX_X:
 951     case REX_XB:
 952     case REX_R:
 953     case REX_RB:
 954     case REX_RX:
 955     case REX_RXB:
 956     case REX_W:
 957     case REX_WB:
 958     case REX_WX:
 959     case REX_WXB:
 960     case REX_WR:
 961     case REX_WRB:
 962     case REX_WRX:
 963     case REX_WRXB:
 964       NOT_LP64(assert(false, "found 64bit prefix"));
 965       ip++;
 966     default:
 967       ip++;
 968     }
 969     debug_only(has_disp32 = true); // has both kinds of operands!
 970     break;
 971 
 972   default:
 973     ShouldNotReachHere();
 974 
 975 #undef REP8
 976 #undef REP16
 977   }
 978 
 979   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 980 #ifdef _LP64
 981   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 982 #else
 983   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 984   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 985 #endif // LP64
 986   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 987 
 988   // parse the output of emit_operand
 989   int op2 = 0xFF & *ip++;
 990   int base = op2 & 0x07;
 991   int op3 = -1;
 992   const int b100 = 4;
 993   const int b101 = 5;
 994   if (base == b100 && (op2 >> 6) != 3) {
 995     op3 = 0xFF & *ip++;
 996     base = op3 & 0x07;   // refetch the base
 997   }
 998   // now ip points at the disp (if any)
 999 
1000   switch (op2 >> 6) {
1001   case 0:
1002     // [00 reg  100][ss index base]
1003     // [00 reg  100][00   100  esp]
1004     // [00 reg base]
1005     // [00 reg  100][ss index  101][disp32]
1006     // [00 reg  101]               [disp32]
1007 
1008     if (base == b101) {
1009       if (which == disp32_operand)
1010         return ip;              // caller wants the disp32
1011       ip += 4;                  // skip the disp32
1012     }
1013     break;
1014 
1015   case 1:
1016     // [01 reg  100][ss index base][disp8]
1017     // [01 reg  100][00   100  esp][disp8]
1018     // [01 reg base]               [disp8]
1019     ip += 1;                    // skip the disp8
1020     break;
1021 
1022   case 2:
1023     // [10 reg  100][ss index base][disp32]
1024     // [10 reg  100][00   100  esp][disp32]
1025     // [10 reg base]               [disp32]
1026     if (which == disp32_operand)
1027       return ip;                // caller wants the disp32
1028     ip += 4;                    // skip the disp32
1029     break;
1030 
1031   case 3:
1032     // [11 reg base]  (not a memory addressing mode)
1033     break;
1034   }
1035 
1036   if (which == end_pc_operand) {
1037     return ip + tail_size;
1038   }
1039 
1040 #ifdef _LP64
1041   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1042 #else
1043   assert(which == imm_operand, "instruction has only an imm field");
1044 #endif // LP64
1045   return ip;
1046 }
1047 
1048 address Assembler::locate_next_instruction(address inst) {
1049   // Secretly share code with locate_operand:
1050   return locate_operand(inst, end_pc_operand);
1051 }
1052 
1053 
1054 #ifdef ASSERT
1055 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1056   address inst = inst_mark();
1057   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1058   address opnd;
1059 
1060   Relocation* r = rspec.reloc();
1061   if (r->type() == relocInfo::none) {
1062     return;
1063   } else if (r->is_call() || format == call32_operand) {
1064     // assert(format == imm32_operand, "cannot specify a nonzero format");
1065     opnd = locate_operand(inst, call32_operand);
1066   } else if (r->is_data()) {
1067     assert(format == imm_operand || format == disp32_operand
1068            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1069     opnd = locate_operand(inst, (WhichOperand)format);
1070   } else {
1071     assert(format == imm_operand, "cannot specify a format");
1072     return;
1073   }
1074   assert(opnd == pc(), "must put operand where relocs can find it");
1075 }
1076 #endif // ASSERT
1077 
1078 void Assembler::emit_operand32(Register reg, Address adr) {
1079   assert(reg->encoding() < 8, "no extended registers");
1080   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1081   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1082                adr._rspec);
1083 }
1084 
1085 void Assembler::emit_operand(Register reg, Address adr,
1086                              int rip_relative_correction) {
1087   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1088                adr._rspec,
1089                rip_relative_correction);
1090 }
1091 
1092 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1093   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1094                adr._rspec);
1095 }
1096 
1097 // MMX operations
1098 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1099   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1100   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1101 }
1102 
1103 // work around gcc (3.2.1-7a) bug
1104 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1105   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1106   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1107 }
1108 
1109 
1110 void Assembler::emit_farith(int b1, int b2, int i) {
1111   assert(isByte(b1) && isByte(b2), "wrong opcode");
1112   assert(0 <= i &&  i < 8, "illegal stack offset");
1113   emit_int8(b1);
1114   emit_int8(b2 + i);
1115 }
1116 
1117 
1118 // Now the Assembler instructions (identical for 32/64 bits)
1119 
1120 void Assembler::adcl(Address dst, int32_t imm32) {
1121   InstructionMark im(this);
1122   prefix(dst);
1123   emit_arith_operand(0x81, rdx, dst, imm32);
1124 }
1125 
1126 void Assembler::adcl(Address dst, Register src) {
1127   InstructionMark im(this);
1128   prefix(dst, src);
1129   emit_int8(0x11);
1130   emit_operand(src, dst);
1131 }
1132 
1133 void Assembler::adcl(Register dst, int32_t imm32) {
1134   prefix(dst);
1135   emit_arith(0x81, 0xD0, dst, imm32);
1136 }
1137 
1138 void Assembler::adcl(Register dst, Address src) {
1139   InstructionMark im(this);
1140   prefix(src, dst);
1141   emit_int8(0x13);
1142   emit_operand(dst, src);
1143 }
1144 
1145 void Assembler::adcl(Register dst, Register src) {
1146   (void) prefix_and_encode(dst->encoding(), src->encoding());
1147   emit_arith(0x13, 0xC0, dst, src);
1148 }
1149 
1150 void Assembler::addl(Address dst, int32_t imm32) {
1151   InstructionMark im(this);
1152   prefix(dst);
1153   emit_arith_operand(0x81, rax, dst, imm32);
1154 }
1155 
1156 void Assembler::addl(Address dst, Register src) {
1157   InstructionMark im(this);
1158   prefix(dst, src);
1159   emit_int8(0x01);
1160   emit_operand(src, dst);
1161 }
1162 
1163 void Assembler::addl(Register dst, int32_t imm32) {
1164   prefix(dst);
1165   emit_arith(0x81, 0xC0, dst, imm32);
1166 }
1167 
1168 void Assembler::addl(Register dst, Address src) {
1169   InstructionMark im(this);
1170   prefix(src, dst);
1171   emit_int8(0x03);
1172   emit_operand(dst, src);
1173 }
1174 
1175 void Assembler::addl(Register dst, Register src) {
1176   (void) prefix_and_encode(dst->encoding(), src->encoding());
1177   emit_arith(0x03, 0xC0, dst, src);
1178 }
1179 
1180 void Assembler::addr_nop_4() {
1181   assert(UseAddressNop, "no CPU support");
1182   // 4 bytes: NOP DWORD PTR [EAX+0]
1183   emit_int8(0x0F);
1184   emit_int8(0x1F);
1185   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1186   emit_int8(0);    // 8-bits offset (1 byte)
1187 }
1188 
1189 void Assembler::addr_nop_5() {
1190   assert(UseAddressNop, "no CPU support");
1191   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1192   emit_int8(0x0F);
1193   emit_int8(0x1F);
1194   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1195   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1196   emit_int8(0);    // 8-bits offset (1 byte)
1197 }
1198 
1199 void Assembler::addr_nop_7() {
1200   assert(UseAddressNop, "no CPU support");
1201   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1202   emit_int8(0x0F);
1203   emit_int8(0x1F);
1204   emit_int8((unsigned char)0x80);
1205                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1206   emit_int32(0);   // 32-bits offset (4 bytes)
1207 }
1208 
1209 void Assembler::addr_nop_8() {
1210   assert(UseAddressNop, "no CPU support");
1211   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1212   emit_int8(0x0F);
1213   emit_int8(0x1F);
1214   emit_int8((unsigned char)0x84);
1215                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1216   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1217   emit_int32(0);   // 32-bits offset (4 bytes)
1218 }
1219 
1220 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1222   if (VM_Version::supports_evex()) {
1223     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1224   } else {
1225     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1226   }
1227 }
1228 
1229 void Assembler::addsd(XMMRegister dst, Address src) {
1230   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1231   if (VM_Version::supports_evex()) {
1232     tuple_type = EVEX_T1S;
1233     input_size_in_bits = EVEX_64bit;
1234     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
1235   } else {
1236     emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1237   }
1238 }
1239 
1240 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1241   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1242   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1243 }
1244 
1245 void Assembler::addss(XMMRegister dst, Address src) {
1246   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1247   if (VM_Version::supports_evex()) {
1248     tuple_type = EVEX_T1S;
1249     input_size_in_bits = EVEX_32bit;
1250   }
1251   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1252 }
1253 
1254 void Assembler::aesdec(XMMRegister dst, Address src) {
1255   assert(VM_Version::supports_aes(), "");
1256   InstructionMark im(this);
1257   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1258               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1259   emit_int8((unsigned char)0xDE);
1260   emit_operand(dst, src);
1261 }
1262 
1263 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1264   assert(VM_Version::supports_aes(), "");
1265   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1266                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1267   emit_int8((unsigned char)0xDE);
1268   emit_int8(0xC0 | encode);
1269 }
1270 
1271 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1272   assert(VM_Version::supports_aes(), "");
1273   InstructionMark im(this);
1274   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1275               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1276   emit_int8((unsigned char)0xDF);
1277   emit_operand(dst, src);
1278 }
1279 
1280 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1281   assert(VM_Version::supports_aes(), "");
1282   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1283                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1284   emit_int8((unsigned char)0xDF);
1285   emit_int8((unsigned char)(0xC0 | encode));
1286 }
1287 
1288 void Assembler::aesenc(XMMRegister dst, Address src) {
1289   assert(VM_Version::supports_aes(), "");
1290   InstructionMark im(this);
1291   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1292               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1293   emit_int8((unsigned char)0xDC);
1294   emit_operand(dst, src);
1295 }
1296 
1297 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1298   assert(VM_Version::supports_aes(), "");
1299   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1300                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1301   emit_int8((unsigned char)0xDC);
1302   emit_int8(0xC0 | encode);
1303 }
1304 
1305 void Assembler::aesenclast(XMMRegister dst, Address src) {
1306   assert(VM_Version::supports_aes(), "");
1307   InstructionMark im(this);
1308   simd_prefix(dst, dst, src, VEX_SIMD_66, false,
1309               VEX_OPCODE_0F_38, false, AVX_128bit, true);
1310   emit_int8((unsigned char)0xDD);
1311   emit_operand(dst, src);
1312 }
1313 
1314 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1315   assert(VM_Version::supports_aes(), "");
1316   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
1317                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
1318   emit_int8((unsigned char)0xDD);
1319   emit_int8((unsigned char)(0xC0 | encode));
1320 }
1321 
1322 
1323 void Assembler::andl(Address dst, int32_t imm32) {
1324   InstructionMark im(this);
1325   prefix(dst);
1326   emit_int8((unsigned char)0x81);
1327   emit_operand(rsp, dst, 4);
1328   emit_int32(imm32);
1329 }
1330 
1331 void Assembler::andl(Register dst, int32_t imm32) {
1332   prefix(dst);
1333   emit_arith(0x81, 0xE0, dst, imm32);
1334 }
1335 
1336 void Assembler::andl(Register dst, Address src) {
1337   InstructionMark im(this);
1338   prefix(src, dst);
1339   emit_int8(0x23);
1340   emit_operand(dst, src);
1341 }
1342 
1343 void Assembler::andl(Register dst, Register src) {
1344   (void) prefix_and_encode(dst->encoding(), src->encoding());
1345   emit_arith(0x23, 0xC0, dst, src);
1346 }
1347 
1348 void Assembler::andnl(Register dst, Register src1, Register src2) {
1349   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1350   int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2, false);
1351   emit_int8((unsigned char)0xF2);
1352   emit_int8((unsigned char)(0xC0 | encode));
1353 }
1354 
1355 void Assembler::andnl(Register dst, Register src1, Address src2) {
1356   InstructionMark im(this);
1357   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1358   vex_prefix_0F38_legacy(dst, src1, src2, false);
1359   emit_int8((unsigned char)0xF2);
1360   emit_operand(dst, src2);
1361 }
1362 
1363 void Assembler::bsfl(Register dst, Register src) {
1364   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1365   emit_int8(0x0F);
1366   emit_int8((unsigned char)0xBC);
1367   emit_int8((unsigned char)(0xC0 | encode));
1368 }
1369 
1370 void Assembler::bsrl(Register dst, Register src) {
1371   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1372   emit_int8(0x0F);
1373   emit_int8((unsigned char)0xBD);
1374   emit_int8((unsigned char)(0xC0 | encode));
1375 }
1376 
1377 void Assembler::bswapl(Register reg) { // bswap
1378   int encode = prefix_and_encode(reg->encoding());
1379   emit_int8(0x0F);
1380   emit_int8((unsigned char)(0xC8 | encode));
1381 }
1382 
1383 void Assembler::blsil(Register dst, Register src) {
1384   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1385   int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src, false);
1386   emit_int8((unsigned char)0xF3);
1387   emit_int8((unsigned char)(0xC0 | encode));
1388 }
1389 
1390 void Assembler::blsil(Register dst, Address src) {
1391   InstructionMark im(this);
1392   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1393   vex_prefix_0F38_legacy(rbx, dst, src, false);
1394   emit_int8((unsigned char)0xF3);
1395   emit_operand(rbx, src);
1396 }
1397 
1398 void Assembler::blsmskl(Register dst, Register src) {
1399   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1400   int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src, false);
1401   emit_int8((unsigned char)0xF3);
1402   emit_int8((unsigned char)(0xC0 | encode));
1403 }
1404 
1405 void Assembler::blsmskl(Register dst, Address src) {
1406   InstructionMark im(this);
1407   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1408   vex_prefix_0F38(rdx, dst, src, false);
1409   emit_int8((unsigned char)0xF3);
1410   emit_operand(rdx, src);
1411 }
1412 
1413 void Assembler::blsrl(Register dst, Register src) {
1414   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1415   int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src, false);
1416   emit_int8((unsigned char)0xF3);
1417   emit_int8((unsigned char)(0xC0 | encode));
1418 }
1419 
1420 void Assembler::blsrl(Register dst, Address src) {
1421   InstructionMark im(this);
1422   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1423   vex_prefix_0F38_legacy(rcx, dst, src, false);
1424   emit_int8((unsigned char)0xF3);
1425   emit_operand(rcx, src);
1426 }
1427 
1428 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1429   // suspect disp32 is always good
1430   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1431 
1432   if (L.is_bound()) {
1433     const int long_size = 5;
1434     int offs = (int)( target(L) - pc() );
1435     assert(offs <= 0, "assembler error");
1436     InstructionMark im(this);
1437     // 1110 1000 #32-bit disp
1438     emit_int8((unsigned char)0xE8);
1439     emit_data(offs - long_size, rtype, operand);
1440   } else {
1441     InstructionMark im(this);
1442     // 1110 1000 #32-bit disp
1443     L.add_patch_at(code(), locator());
1444 
1445     emit_int8((unsigned char)0xE8);
1446     emit_data(int(0), rtype, operand);
1447   }
1448 }
1449 
1450 void Assembler::call(Register dst) {
1451   int encode = prefix_and_encode(dst->encoding());
1452   emit_int8((unsigned char)0xFF);
1453   emit_int8((unsigned char)(0xD0 | encode));
1454 }
1455 
1456 
1457 void Assembler::call(Address adr) {
1458   InstructionMark im(this);
1459   prefix(adr);
1460   emit_int8((unsigned char)0xFF);
1461   emit_operand(rdx, adr);
1462 }
1463 
1464 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1465   assert(entry != NULL, "call most probably wrong");
1466   InstructionMark im(this);
1467   emit_int8((unsigned char)0xE8);
1468   intptr_t disp = entry - (pc() + sizeof(int32_t));
1469   assert(is_simm32(disp), "must be 32bit offset (call2)");
1470   // Technically, should use call32_operand, but this format is
1471   // implied by the fact that we're emitting a call instruction.
1472 
1473   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1474   emit_data((int) disp, rspec, operand);
1475 }
1476 
1477 void Assembler::cdql() {
1478   emit_int8((unsigned char)0x99);
1479 }
1480 
1481 void Assembler::cld() {
1482   emit_int8((unsigned char)0xFC);
1483 }
1484 
1485 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1486   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1487   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1488   emit_int8(0x0F);
1489   emit_int8(0x40 | cc);
1490   emit_int8((unsigned char)(0xC0 | encode));
1491 }
1492 
1493 
1494 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1495   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1496   prefix(src, dst);
1497   emit_int8(0x0F);
1498   emit_int8(0x40 | cc);
1499   emit_operand(dst, src);
1500 }
1501 
1502 void Assembler::cmpb(Address dst, int imm8) {
1503   InstructionMark im(this);
1504   prefix(dst);
1505   emit_int8((unsigned char)0x80);
1506   emit_operand(rdi, dst, 1);
1507   emit_int8(imm8);
1508 }
1509 
1510 void Assembler::cmpl(Address dst, int32_t imm32) {
1511   InstructionMark im(this);
1512   prefix(dst);
1513   emit_int8((unsigned char)0x81);
1514   emit_operand(rdi, dst, 4);
1515   emit_int32(imm32);
1516 }
1517 
1518 void Assembler::cmpl(Register dst, int32_t imm32) {
1519   prefix(dst);
1520   emit_arith(0x81, 0xF8, dst, imm32);
1521 }
1522 
1523 void Assembler::cmpl(Register dst, Register src) {
1524   (void) prefix_and_encode(dst->encoding(), src->encoding());
1525   emit_arith(0x3B, 0xC0, dst, src);
1526 }
1527 
1528 
1529 void Assembler::cmpl(Register dst, Address  src) {
1530   InstructionMark im(this);
1531   prefix(src, dst);
1532   emit_int8((unsigned char)0x3B);
1533   emit_operand(dst, src);
1534 }
1535 
1536 void Assembler::cmpw(Address dst, int imm16) {
1537   InstructionMark im(this);
1538   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1539   emit_int8(0x66);
1540   emit_int8((unsigned char)0x81);
1541   emit_operand(rdi, dst, 2);
1542   emit_int16(imm16);
1543 }
1544 
1545 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1546 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1547 // The ZF is set if the compared values were equal, and cleared otherwise.
1548 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1549   InstructionMark im(this);
1550   prefix(adr, reg);
1551   emit_int8(0x0F);
1552   emit_int8((unsigned char)0xB1);
1553   emit_operand(reg, adr);
1554 }
1555 
1556 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1557 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1558 // The ZF is set if the compared values were equal, and cleared otherwise.
1559 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1560   InstructionMark im(this);
1561   prefix(adr, reg, true);
1562   emit_int8(0x0F);
1563   emit_int8((unsigned char)0xB0);
1564   emit_operand(reg, adr);
1565 }
1566 
1567 void Assembler::comisd(XMMRegister dst, Address src) {
1568   // NOTE: dbx seems to decode this as comiss even though the
1569   // 0x66 is there. Strangly ucomisd comes out correct
1570   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1571   if (VM_Version::supports_evex()) {
1572     tuple_type = EVEX_T1S;
1573     input_size_in_bits = EVEX_64bit;
1574     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1575   } else {
1576     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1577   }
1578 }
1579 
1580 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1581   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1582   if (VM_Version::supports_evex()) {
1583     emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
1584   } else {
1585     emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1586   }
1587 }
1588 
1589 void Assembler::comiss(XMMRegister dst, Address src) {
1590   if (VM_Version::supports_evex()) {
1591     tuple_type = EVEX_T1S;
1592     input_size_in_bits = EVEX_32bit;
1593   }
1594   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1595   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1596 }
1597 
1598 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1599   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1600   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
1601 }
1602 
1603 void Assembler::cpuid() {
1604   emit_int8(0x0F);
1605   emit_int8((unsigned char)0xA2);
1606 }
1607 
1608 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1609 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1610 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1611 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1612 //
1613 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1614 //
1615 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1616 //
1617 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1618 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1619   assert(VM_Version::supports_sse4_2(), "");
1620   int8_t w = 0x01;
1621   Prefix p = Prefix_EMPTY;
1622 
1623   emit_int8((int8_t)0xF2);
1624   switch (sizeInBytes) {
1625   case 1:
1626     w = 0;
1627     break;
1628   case 2:
1629   case 4:
1630     break;
1631   LP64_ONLY(case 8:)
1632     // This instruction is not valid in 32 bits
1633     // Note:
1634     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1635     //
1636     // Page B - 72   Vol. 2C says
1637     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1638     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1639     //                                                                            F0!!!
1640     // while 3 - 208 Vol. 2A
1641     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1642     //
1643     // the 0 on a last bit is reserved for a different flavor of this instruction :
1644     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1645     p = REX_W;
1646     break;
1647   default:
1648     assert(0, "Unsupported value for a sizeInBytes argument");
1649     break;
1650   }
1651   LP64_ONLY(prefix(crc, v, p);)
1652   emit_int8((int8_t)0x0F);
1653   emit_int8(0x38);
1654   emit_int8((int8_t)(0xF0 | w));
1655   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1656 }
1657 
1658 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1659   assert(VM_Version::supports_sse4_2(), "");
1660   InstructionMark im(this);
1661   int8_t w = 0x01;
1662   Prefix p = Prefix_EMPTY;
1663 
1664   emit_int8((int8_t)0xF2);
1665   switch (sizeInBytes) {
1666   case 1:
1667     w = 0;
1668     break;
1669   case 2:
1670   case 4:
1671     break;
1672   LP64_ONLY(case 8:)
1673     // This instruction is not valid in 32 bits
1674     p = REX_W;
1675     break;
1676   default:
1677     assert(0, "Unsupported value for a sizeInBytes argument");
1678     break;
1679   }
1680   LP64_ONLY(prefix(crc, adr, p);)
1681   emit_int8((int8_t)0x0F);
1682   emit_int8(0x38);
1683   emit_int8((int8_t)(0xF0 | w));
1684   emit_operand(crc, adr);
1685 }
1686 
1687 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1688   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1689   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1690 }
1691 
1692 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1693   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1694   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1695 }
1696 
1697 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1698   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1699   if (VM_Version::supports_evex()) {
1700     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1701   } else {
1702     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1703   }
1704 }
1705 
1706 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1707   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1708   if (VM_Version::supports_evex()) {
1709     tuple_type = EVEX_T1F;
1710     input_size_in_bits = EVEX_64bit;
1711     emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
1712   } else {
1713     emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1714   }
1715 }
1716 
1717 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1718   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1719   int encode = 0;
1720   if (VM_Version::supports_evex()) {
1721     encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
1722   } else {
1723     encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false);
1724   }
1725   emit_int8(0x2A);
1726   emit_int8((unsigned char)(0xC0 | encode));
1727 }
1728 
1729 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1730   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1731   if (VM_Version::supports_evex()) {
1732     tuple_type = EVEX_T1S;
1733     input_size_in_bits = EVEX_32bit;
1734     emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true);
1735   } else {
1736     emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1737   }
1738 }
1739 
1740 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1741   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1742   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true);
1743   emit_int8(0x2A);
1744   emit_int8((unsigned char)(0xC0 | encode));
1745 }
1746 
1747 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1748   if (VM_Version::supports_evex()) {
1749     tuple_type = EVEX_T1S;
1750     input_size_in_bits = EVEX_32bit;
1751   }
1752   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1753   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
1754 }
1755 
1756 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1757   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1758   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
1759   emit_int8(0x2A);
1760   emit_int8((unsigned char)(0xC0 | encode));
1761 }
1762 
1763 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1764   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1765   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1766 }
1767 
1768 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1769   if (VM_Version::supports_evex()) {
1770     tuple_type = EVEX_T1S;
1771     input_size_in_bits = EVEX_32bit;
1772   }
1773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1774   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1775 }
1776 
1777 
1778 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1779   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1780   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
1781   emit_int8(0x2C);
1782   emit_int8((unsigned char)(0xC0 | encode));
1783 }
1784 
1785 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1786   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1787   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
1788   emit_int8(0x2C);
1789   emit_int8((unsigned char)(0xC0 | encode));
1790 }
1791 
1792 void Assembler::decl(Address dst) {
1793   // Don't use it directly. Use MacroAssembler::decrement() instead.
1794   InstructionMark im(this);
1795   prefix(dst);
1796   emit_int8((unsigned char)0xFF);
1797   emit_operand(rcx, dst);
1798 }
1799 
1800 void Assembler::divsd(XMMRegister dst, Address src) {
1801   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1802   if (VM_Version::supports_evex()) {
1803     tuple_type = EVEX_T1S;
1804     input_size_in_bits = EVEX_64bit;
1805     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1806   } else {
1807     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1808   }
1809 }
1810 
1811 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1812   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1813   if (VM_Version::supports_evex()) {
1814     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
1815   } else {
1816     emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1817   }
1818 }
1819 
1820 void Assembler::divss(XMMRegister dst, Address src) {
1821   if (VM_Version::supports_evex()) {
1822     tuple_type = EVEX_T1S;
1823     input_size_in_bits = EVEX_32bit;
1824   }
1825   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1826   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1827 }
1828 
1829 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1830   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1831   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1832 }
1833 
1834 void Assembler::emms() {
1835   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1836   emit_int8(0x0F);
1837   emit_int8(0x77);
1838 }
1839 
1840 void Assembler::hlt() {
1841   emit_int8((unsigned char)0xF4);
1842 }
1843 
1844 void Assembler::idivl(Register src) {
1845   int encode = prefix_and_encode(src->encoding());
1846   emit_int8((unsigned char)0xF7);
1847   emit_int8((unsigned char)(0xF8 | encode));
1848 }
1849 
1850 void Assembler::divl(Register src) { // Unsigned
1851   int encode = prefix_and_encode(src->encoding());
1852   emit_int8((unsigned char)0xF7);
1853   emit_int8((unsigned char)(0xF0 | encode));
1854 }
1855 
1856 void Assembler::imull(Register dst, Register src) {
1857   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1858   emit_int8(0x0F);
1859   emit_int8((unsigned char)0xAF);
1860   emit_int8((unsigned char)(0xC0 | encode));
1861 }
1862 
1863 
1864 void Assembler::imull(Register dst, Register src, int value) {
1865   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1866   if (is8bit(value)) {
1867     emit_int8(0x6B);
1868     emit_int8((unsigned char)(0xC0 | encode));
1869     emit_int8(value & 0xFF);
1870   } else {
1871     emit_int8(0x69);
1872     emit_int8((unsigned char)(0xC0 | encode));
1873     emit_int32(value);
1874   }
1875 }
1876 
1877 void Assembler::imull(Register dst, Address src) {
1878   InstructionMark im(this);
1879   prefix(src, dst);
1880   emit_int8(0x0F);
1881   emit_int8((unsigned char) 0xAF);
1882   emit_operand(dst, src);
1883 }
1884 
1885 
1886 void Assembler::incl(Address dst) {
1887   // Don't use it directly. Use MacroAssembler::increment() instead.
1888   InstructionMark im(this);
1889   prefix(dst);
1890   emit_int8((unsigned char)0xFF);
1891   emit_operand(rax, dst);
1892 }
1893 
1894 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1895   InstructionMark im(this);
1896   assert((0 <= cc) && (cc < 16), "illegal cc");
1897   if (L.is_bound()) {
1898     address dst = target(L);
1899     assert(dst != NULL, "jcc most probably wrong");
1900 
1901     const int short_size = 2;
1902     const int long_size = 6;
1903     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1904     if (maybe_short && is8bit(offs - short_size)) {
1905       // 0111 tttn #8-bit disp
1906       emit_int8(0x70 | cc);
1907       emit_int8((offs - short_size) & 0xFF);
1908     } else {
1909       // 0000 1111 1000 tttn #32-bit disp
1910       assert(is_simm32(offs - long_size),
1911              "must be 32bit offset (call4)");
1912       emit_int8(0x0F);
1913       emit_int8((unsigned char)(0x80 | cc));
1914       emit_int32(offs - long_size);
1915     }
1916   } else {
1917     // Note: could eliminate cond. jumps to this jump if condition
1918     //       is the same however, seems to be rather unlikely case.
1919     // Note: use jccb() if label to be bound is very close to get
1920     //       an 8-bit displacement
1921     L.add_patch_at(code(), locator());
1922     emit_int8(0x0F);
1923     emit_int8((unsigned char)(0x80 | cc));
1924     emit_int32(0);
1925   }
1926 }
1927 
1928 void Assembler::jccb(Condition cc, Label& L) {
1929   if (L.is_bound()) {
1930     const int short_size = 2;
1931     address entry = target(L);
1932 #ifdef ASSERT
1933     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1934     intptr_t delta = short_branch_delta();
1935     if (delta != 0) {
1936       dist += (dist < 0 ? (-delta) :delta);
1937     }
1938     assert(is8bit(dist), "Dispacement too large for a short jmp");
1939 #endif
1940     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1941     // 0111 tttn #8-bit disp
1942     emit_int8(0x70 | cc);
1943     emit_int8((offs - short_size) & 0xFF);
1944   } else {
1945     InstructionMark im(this);
1946     L.add_patch_at(code(), locator());
1947     emit_int8(0x70 | cc);
1948     emit_int8(0);
1949   }
1950 }
1951 
1952 void Assembler::jmp(Address adr) {
1953   InstructionMark im(this);
1954   prefix(adr);
1955   emit_int8((unsigned char)0xFF);
1956   emit_operand(rsp, adr);
1957 }
1958 
1959 void Assembler::jmp(Label& L, bool maybe_short) {
1960   if (L.is_bound()) {
1961     address entry = target(L);
1962     assert(entry != NULL, "jmp most probably wrong");
1963     InstructionMark im(this);
1964     const int short_size = 2;
1965     const int long_size = 5;
1966     intptr_t offs = entry - pc();
1967     if (maybe_short && is8bit(offs - short_size)) {
1968       emit_int8((unsigned char)0xEB);
1969       emit_int8((offs - short_size) & 0xFF);
1970     } else {
1971       emit_int8((unsigned char)0xE9);
1972       emit_int32(offs - long_size);
1973     }
1974   } else {
1975     // By default, forward jumps are always 32-bit displacements, since
1976     // we can't yet know where the label will be bound.  If you're sure that
1977     // the forward jump will not run beyond 256 bytes, use jmpb to
1978     // force an 8-bit displacement.
1979     InstructionMark im(this);
1980     L.add_patch_at(code(), locator());
1981     emit_int8((unsigned char)0xE9);
1982     emit_int32(0);
1983   }
1984 }
1985 
1986 void Assembler::jmp(Register entry) {
1987   int encode = prefix_and_encode(entry->encoding());
1988   emit_int8((unsigned char)0xFF);
1989   emit_int8((unsigned char)(0xE0 | encode));
1990 }
1991 
1992 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1993   InstructionMark im(this);
1994   emit_int8((unsigned char)0xE9);
1995   assert(dest != NULL, "must have a target");
1996   intptr_t disp = dest - (pc() + sizeof(int32_t));
1997   assert(is_simm32(disp), "must be 32bit offset (jmp)");
1998   emit_data(disp, rspec.reloc(), call32_operand);
1999 }
2000 
2001 void Assembler::jmpb(Label& L) {
2002   if (L.is_bound()) {
2003     const int short_size = 2;
2004     address entry = target(L);
2005     assert(entry != NULL, "jmp most probably wrong");
2006 #ifdef ASSERT
2007     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2008     intptr_t delta = short_branch_delta();
2009     if (delta != 0) {
2010       dist += (dist < 0 ? (-delta) :delta);
2011     }
2012     assert(is8bit(dist), "Dispacement too large for a short jmp");
2013 #endif
2014     intptr_t offs = entry - pc();
2015     emit_int8((unsigned char)0xEB);
2016     emit_int8((offs - short_size) & 0xFF);
2017   } else {
2018     InstructionMark im(this);
2019     L.add_patch_at(code(), locator());
2020     emit_int8((unsigned char)0xEB);
2021     emit_int8(0);
2022   }
2023 }
2024 
2025 void Assembler::ldmxcsr( Address src) {
2026   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2027   InstructionMark im(this);
2028   prefix(src);
2029   emit_int8(0x0F);
2030   emit_int8((unsigned char)0xAE);
2031   emit_operand(as_Register(2), src);
2032 }
2033 
2034 void Assembler::leal(Register dst, Address src) {
2035   InstructionMark im(this);
2036 #ifdef _LP64
2037   emit_int8(0x67); // addr32
2038   prefix(src, dst);
2039 #endif // LP64
2040   emit_int8((unsigned char)0x8D);
2041   emit_operand(dst, src);
2042 }
2043 
2044 void Assembler::lfence() {
2045   emit_int8(0x0F);
2046   emit_int8((unsigned char)0xAE);
2047   emit_int8((unsigned char)0xE8);
2048 }
2049 
2050 void Assembler::lock() {
2051   emit_int8((unsigned char)0xF0);
2052 }
2053 
2054 void Assembler::lzcntl(Register dst, Register src) {
2055   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2056   emit_int8((unsigned char)0xF3);
2057   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2058   emit_int8(0x0F);
2059   emit_int8((unsigned char)0xBD);
2060   emit_int8((unsigned char)(0xC0 | encode));
2061 }
2062 
2063 // Emit mfence instruction
2064 void Assembler::mfence() {
2065   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2066   emit_int8(0x0F);
2067   emit_int8((unsigned char)0xAE);
2068   emit_int8((unsigned char)0xF0);
2069 }
2070 
2071 void Assembler::mov(Register dst, Register src) {
2072   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2073 }
2074 
2075 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2076   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2077   if (VM_Version::supports_evex()) {
2078     emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true);
2079   } else {
2080     emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
2081   }
2082 }
2083 
2084 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2085   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2086   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
2087 }
2088 
2089 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2090   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2091   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F,
2092                                       false, AVX_128bit);
2093   emit_int8(0x16);
2094   emit_int8((unsigned char)(0xC0 | encode));
2095 }
2096 
2097 void Assembler::movb(Register dst, Address src) {
2098   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2099   InstructionMark im(this);
2100   prefix(src, dst, true);
2101   emit_int8((unsigned char)0x8A);
2102   emit_operand(dst, src);
2103 }
2104 
2105 void Assembler::kmovq(KRegister dst, KRegister src) {
2106   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2107   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
2108                                       true, VEX_OPCODE_0F, true);
2109   emit_int8((unsigned char)0x90);
2110   emit_int8((unsigned char)(0xC0 | encode));
2111 }
2112 
2113 void Assembler::kmovq(KRegister dst, Address src) {
2114   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2115   int dst_enc = dst->encoding();
2116   int nds_enc = 0;
2117   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
2118              VEX_OPCODE_0F, true, AVX_128bit, true, true);
2119   emit_int8((unsigned char)0x90);
2120   emit_operand((Register)dst, src);
2121 }
2122 
2123 void Assembler::kmovq(Address dst, KRegister src) {
2124   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2125   int src_enc = src->encoding();
2126   int nds_enc = 0;
2127   vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
2128              VEX_OPCODE_0F, true, AVX_128bit, true, true);
2129   emit_int8((unsigned char)0x90);
2130   emit_operand((Register)src, dst);
2131 }
2132 
2133 void Assembler::kmovql(KRegister dst, Register src) {
2134   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2135   bool supports_bw = VM_Version::supports_avx512bw();
2136   VexSimdPrefix pre = supports_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2137   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true,
2138                                       VEX_OPCODE_0F, supports_bw);
2139   emit_int8((unsigned char)0x92);
2140   emit_int8((unsigned char)(0xC0 | encode));
2141 }
2142 
2143 void Assembler::kmovdl(KRegister dst, Register src) {
2144   NOT_LP64(assert(VM_Version::supports_evex(), ""));
2145   VexSimdPrefix pre = VM_Version::supports_avx512bw() ? VEX_SIMD_F2 : VEX_SIMD_NONE;
2146   int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, VEX_OPCODE_0F, false);
2147   emit_int8((unsigned char)0x92);
2148   emit_int8((unsigned char)(0xC0 | encode));
2149 }
2150 
2151 void Assembler::movb(Address dst, int imm8) {
2152   InstructionMark im(this);
2153    prefix(dst);
2154   emit_int8((unsigned char)0xC6);
2155   emit_operand(rax, dst, 1);
2156   emit_int8(imm8);
2157 }
2158 
2159 
2160 void Assembler::movb(Address dst, Register src) {
2161   assert(src->has_byte_register(), "must have byte register");
2162   InstructionMark im(this);
2163   prefix(dst, src, true);
2164   emit_int8((unsigned char)0x88);
2165   emit_operand(src, dst);
2166 }
2167 
2168 void Assembler::movdl(XMMRegister dst, Register src) {
2169   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2170   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true);
2171   emit_int8(0x6E);
2172   emit_int8((unsigned char)(0xC0 | encode));
2173 }
2174 
2175 void Assembler::movdl(Register dst, XMMRegister src) {
2176   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2177   // swap src/dst to get correct prefix
2178   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true);
2179   emit_int8(0x7E);
2180   emit_int8((unsigned char)(0xC0 | encode));
2181 }
2182 
2183 void Assembler::movdl(XMMRegister dst, Address src) {
2184   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2185   if (VM_Version::supports_evex()) {
2186     tuple_type = EVEX_T1S;
2187     input_size_in_bits = EVEX_32bit;
2188   }
2189   InstructionMark im(this);
2190   simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F);
2191   emit_int8(0x6E);
2192   emit_operand(dst, src);
2193 }
2194 
2195 void Assembler::movdl(Address dst, XMMRegister src) {
2196   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2197   if (VM_Version::supports_evex()) {
2198     tuple_type = EVEX_T1S;
2199     input_size_in_bits = EVEX_32bit;
2200   }
2201   InstructionMark im(this);
2202   simd_prefix(dst, src, VEX_SIMD_66, true);
2203   emit_int8(0x7E);
2204   emit_operand(src, dst);
2205 }
2206 
2207 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2208   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2209   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2210 }
2211 
2212 void Assembler::movdqa(XMMRegister dst, Address src) {
2213   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2214   if (VM_Version::supports_evex()) {
2215     tuple_type = EVEX_FVM;
2216   }
2217   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
2218 }
2219 
2220 void Assembler::movdqu(XMMRegister dst, Address src) {
2221   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2222   if (VM_Version::supports_evex()) {
2223     tuple_type = EVEX_FVM;
2224   }
2225   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2226 }
2227 
2228 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2229   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2230   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
2231 }
2232 
2233 void Assembler::movdqu(Address dst, XMMRegister src) {
2234   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2235   if (VM_Version::supports_evex()) {
2236     tuple_type = EVEX_FVM;
2237   }
2238   InstructionMark im(this);
2239   simd_prefix(dst, src, VEX_SIMD_F3, false);
2240   emit_int8(0x7F);
2241   emit_operand(src, dst);
2242 }
2243 
2244 // Move Unaligned 256bit Vector
2245 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2246   assert(UseAVX > 0, "");
2247   if (VM_Version::supports_evex()) {
2248     tuple_type = EVEX_FVM;
2249   }
2250   int vector_len = AVX_256bit;
2251   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
2252   emit_int8(0x6F);
2253   emit_int8((unsigned char)(0xC0 | encode));
2254 }
2255 
2256 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2257   assert(UseAVX > 0, "");
2258   if (VM_Version::supports_evex()) {
2259     tuple_type = EVEX_FVM;
2260   }
2261   InstructionMark im(this);
2262   int vector_len = AVX_256bit;
2263   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2264   emit_int8(0x6F);
2265   emit_operand(dst, src);
2266 }
2267 
2268 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2269   assert(UseAVX > 0, "");
2270   if (VM_Version::supports_evex()) {
2271     tuple_type = EVEX_FVM;
2272   }
2273   InstructionMark im(this);
2274   int vector_len = AVX_256bit;
2275   // swap src<->dst for encoding
2276   assert(src != xnoreg, "sanity");
2277   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2278   emit_int8(0x7F);
2279   emit_operand(src, dst);
2280 }
2281 
2282 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2283 void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
2284   assert(UseAVX > 0, "");
2285   int src_enc = src->encoding();
2286   int dst_enc = dst->encoding();
2287   int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
2288                                      true, vector_len, false, false);
2289   emit_int8(0x6F);
2290   emit_int8((unsigned char)(0xC0 | encode));
2291 }
2292 
2293 void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) {
2294   assert(UseAVX > 0, "");
2295   InstructionMark im(this);
2296   if (VM_Version::supports_evex()) {
2297     tuple_type = EVEX_FVM;
2298     vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2299   } else {
2300     vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
2301   }
2302   emit_int8(0x6F);
2303   emit_operand(dst, src);
2304 }
2305 
2306 void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) {
2307   assert(UseAVX > 0, "");
2308   InstructionMark im(this);
2309   assert(src != xnoreg, "sanity");
2310   if (VM_Version::supports_evex()) {
2311     tuple_type = EVEX_FVM;
2312     // swap src<->dst for encoding
2313     vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2314   } else {
2315     // swap src<->dst for encoding
2316     vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
2317   }
2318   emit_int8(0x7F);
2319   emit_operand(src, dst);
2320 }
2321 
2322 // Uses zero extension on 64bit
2323 
2324 void Assembler::movl(Register dst, int32_t imm32) {
2325   int encode = prefix_and_encode(dst->encoding());
2326   emit_int8((unsigned char)(0xB8 | encode));
2327   emit_int32(imm32);
2328 }
2329 
2330 void Assembler::movl(Register dst, Register src) {
2331   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2332   emit_int8((unsigned char)0x8B);
2333   emit_int8((unsigned char)(0xC0 | encode));
2334 }
2335 
2336 void Assembler::movl(Register dst, Address src) {
2337   InstructionMark im(this);
2338   prefix(src, dst);
2339   emit_int8((unsigned char)0x8B);
2340   emit_operand(dst, src);
2341 }
2342 
2343 void Assembler::movl(Address dst, int32_t imm32) {
2344   InstructionMark im(this);
2345   prefix(dst);
2346   emit_int8((unsigned char)0xC7);
2347   emit_operand(rax, dst, 4);
2348   emit_int32(imm32);
2349 }
2350 
2351 void Assembler::movl(Address dst, Register src) {
2352   InstructionMark im(this);
2353   prefix(dst, src);
2354   emit_int8((unsigned char)0x89);
2355   emit_operand(src, dst);
2356 }
2357 
2358 // New cpus require to use movsd and movss to avoid partial register stall
2359 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2360 // The selection is done in MacroAssembler::movdbl() and movflt().
2361 void Assembler::movlpd(XMMRegister dst, Address src) {
2362   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2363   if (VM_Version::supports_evex()) {
2364     tuple_type = EVEX_T1S;
2365     input_size_in_bits = EVEX_32bit;
2366   }
2367   emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true);
2368 }
2369 
2370 void Assembler::movq( MMXRegister dst, Address src ) {
2371   assert( VM_Version::supports_mmx(), "" );
2372   emit_int8(0x0F);
2373   emit_int8(0x6F);
2374   emit_operand(dst, src);
2375 }
2376 
2377 void Assembler::movq( Address dst, MMXRegister src ) {
2378   assert( VM_Version::supports_mmx(), "" );
2379   emit_int8(0x0F);
2380   emit_int8(0x7F);
2381   // workaround gcc (3.2.1-7a) bug
2382   // In that version of gcc with only an emit_operand(MMX, Address)
2383   // gcc will tail jump and try and reverse the parameters completely
2384   // obliterating dst in the process. By having a version available
2385   // that doesn't need to swap the args at the tail jump the bug is
2386   // avoided.
2387   emit_operand(dst, src);
2388 }
2389 
2390 void Assembler::movq(XMMRegister dst, Address src) {
2391   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2392   InstructionMark im(this);
2393   if (VM_Version::supports_evex()) {
2394     tuple_type = EVEX_T1S;
2395     input_size_in_bits = EVEX_64bit;
2396     simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true);
2397   } else {
2398     simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F);
2399   }
2400   emit_int8(0x7E);
2401   emit_operand(dst, src);
2402 }
2403 
2404 void Assembler::movq(Address dst, XMMRegister src) {
2405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406   InstructionMark im(this);
2407   if (VM_Version::supports_evex()) {
2408     tuple_type = EVEX_T1S;
2409     input_size_in_bits = EVEX_64bit;
2410     simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true,
2411                 VEX_OPCODE_0F, true, AVX_128bit);
2412   } else {
2413     simd_prefix(dst, src, VEX_SIMD_66, true);
2414   }
2415   emit_int8((unsigned char)0xD6);
2416   emit_operand(src, dst);
2417 }
2418 
2419 void Assembler::movsbl(Register dst, Address src) { // movsxb
2420   InstructionMark im(this);
2421   prefix(src, dst);
2422   emit_int8(0x0F);
2423   emit_int8((unsigned char)0xBE);
2424   emit_operand(dst, src);
2425 }
2426 
2427 void Assembler::movsbl(Register dst, Register src) { // movsxb
2428   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2429   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
2430   emit_int8(0x0F);
2431   emit_int8((unsigned char)0xBE);
2432   emit_int8((unsigned char)(0xC0 | encode));
2433 }
2434 
2435 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2437   if (VM_Version::supports_evex()) {
2438     emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true);
2439   } else {
2440     emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
2441   }
2442 }
2443 
2444 void Assembler::movsd(XMMRegister dst, Address src) {
2445   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2446   if (VM_Version::supports_evex()) {
2447     tuple_type = EVEX_T1S;
2448     input_size_in_bits = EVEX_64bit;
2449     emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true);
2450   } else {
2451     emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
2452   }
2453 }
2454 
2455 void Assembler::movsd(Address dst, XMMRegister src) {
2456   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2457   InstructionMark im(this);
2458   if (VM_Version::supports_evex()) {
2459     tuple_type = EVEX_T1S;
2460     input_size_in_bits = EVEX_64bit;
2461     simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
2462   } else {
2463     simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false);
2464   }
2465   emit_int8(0x11);
2466   emit_operand(src, dst);
2467 }
2468 
2469 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2470   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2471   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true);
2472 }
2473 
2474 void Assembler::movss(XMMRegister dst, Address src) {
2475   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2476   if (VM_Version::supports_evex()) {
2477     tuple_type = EVEX_T1S;
2478     input_size_in_bits = EVEX_32bit;
2479   }
2480   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true);
2481 }
2482 
2483 void Assembler::movss(Address dst, XMMRegister src) {
2484   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2485   if (VM_Version::supports_evex()) {
2486     tuple_type = EVEX_T1S;
2487     input_size_in_bits = EVEX_32bit;
2488   }
2489   InstructionMark im(this);
2490   simd_prefix(dst, src, VEX_SIMD_F3, false);
2491   emit_int8(0x11);
2492   emit_operand(src, dst);
2493 }
2494 
2495 void Assembler::movswl(Register dst, Address src) { // movsxw
2496   InstructionMark im(this);
2497   prefix(src, dst);
2498   emit_int8(0x0F);
2499   emit_int8((unsigned char)0xBF);
2500   emit_operand(dst, src);
2501 }
2502 
2503 void Assembler::movswl(Register dst, Register src) { // movsxw
2504   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2505   emit_int8(0x0F);
2506   emit_int8((unsigned char)0xBF);
2507   emit_int8((unsigned char)(0xC0 | encode));
2508 }
2509 
2510 void Assembler::movw(Address dst, int imm16) {
2511   InstructionMark im(this);
2512 
2513   emit_int8(0x66); // switch to 16-bit mode
2514   prefix(dst);
2515   emit_int8((unsigned char)0xC7);
2516   emit_operand(rax, dst, 2);
2517   emit_int16(imm16);
2518 }
2519 
2520 void Assembler::movw(Register dst, Address src) {
2521   InstructionMark im(this);
2522   emit_int8(0x66);
2523   prefix(src, dst);
2524   emit_int8((unsigned char)0x8B);
2525   emit_operand(dst, src);
2526 }
2527 
2528 void Assembler::movw(Address dst, Register src) {
2529   InstructionMark im(this);
2530   emit_int8(0x66);
2531   prefix(dst, src);
2532   emit_int8((unsigned char)0x89);
2533   emit_operand(src, dst);
2534 }
2535 
2536 void Assembler::movzbl(Register dst, Address src) { // movzxb
2537   InstructionMark im(this);
2538   prefix(src, dst);
2539   emit_int8(0x0F);
2540   emit_int8((unsigned char)0xB6);
2541   emit_operand(dst, src);
2542 }
2543 
2544 void Assembler::movzbl(Register dst, Register src) { // movzxb
2545   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2546   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
2547   emit_int8(0x0F);
2548   emit_int8((unsigned char)0xB6);
2549   emit_int8(0xC0 | encode);
2550 }
2551 
2552 void Assembler::movzwl(Register dst, Address src) { // movzxw
2553   InstructionMark im(this);
2554   prefix(src, dst);
2555   emit_int8(0x0F);
2556   emit_int8((unsigned char)0xB7);
2557   emit_operand(dst, src);
2558 }
2559 
2560 void Assembler::movzwl(Register dst, Register src) { // movzxw
2561   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2562   emit_int8(0x0F);
2563   emit_int8((unsigned char)0xB7);
2564   emit_int8(0xC0 | encode);
2565 }
2566 
2567 void Assembler::mull(Address src) {
2568   InstructionMark im(this);
2569   prefix(src);
2570   emit_int8((unsigned char)0xF7);
2571   emit_operand(rsp, src);
2572 }
2573 
2574 void Assembler::mull(Register src) {
2575   int encode = prefix_and_encode(src->encoding());
2576   emit_int8((unsigned char)0xF7);
2577   emit_int8((unsigned char)(0xE0 | encode));
2578 }
2579 
2580 void Assembler::mulsd(XMMRegister dst, Address src) {
2581   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2582   if (VM_Version::supports_evex()) {
2583     tuple_type = EVEX_T1S;
2584     input_size_in_bits = EVEX_64bit;
2585     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2586   } else {
2587     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2588   }
2589 }
2590 
2591 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2592   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2593   if (VM_Version::supports_evex()) {
2594     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
2595   } else {
2596     emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
2597   }
2598 }
2599 
2600 void Assembler::mulss(XMMRegister dst, Address src) {
2601   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2602   if (VM_Version::supports_evex()) {
2603     tuple_type = EVEX_T1S;
2604     input_size_in_bits = EVEX_32bit;
2605   }
2606   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2607 }
2608 
2609 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2610   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2611   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
2612 }
2613 
2614 void Assembler::negl(Register dst) {
2615   int encode = prefix_and_encode(dst->encoding());
2616   emit_int8((unsigned char)0xF7);
2617   emit_int8((unsigned char)(0xD8 | encode));
2618 }
2619 
2620 void Assembler::nop(int i) {
2621 #ifdef ASSERT
2622   assert(i > 0, " ");
2623   // The fancy nops aren't currently recognized by debuggers making it a
2624   // pain to disassemble code while debugging. If asserts are on clearly
2625   // speed is not an issue so simply use the single byte traditional nop
2626   // to do alignment.
2627 
2628   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2629   return;
2630 
2631 #endif // ASSERT
2632 
2633   if (UseAddressNop && VM_Version::is_intel()) {
2634     //
2635     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2636     //  1: 0x90
2637     //  2: 0x66 0x90
2638     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2639     //  4: 0x0F 0x1F 0x40 0x00
2640     //  5: 0x0F 0x1F 0x44 0x00 0x00
2641     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2642     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2643     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2644     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2645     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2646     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2647 
2648     // The rest coding is Intel specific - don't use consecutive address nops
2649 
2650     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2651     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2652     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2653     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2654 
2655     while(i >= 15) {
2656       // For Intel don't generate consecutive addess nops (mix with regular nops)
2657       i -= 15;
2658       emit_int8(0x66);   // size prefix
2659       emit_int8(0x66);   // size prefix
2660       emit_int8(0x66);   // size prefix
2661       addr_nop_8();
2662       emit_int8(0x66);   // size prefix
2663       emit_int8(0x66);   // size prefix
2664       emit_int8(0x66);   // size prefix
2665       emit_int8((unsigned char)0x90);
2666                          // nop
2667     }
2668     switch (i) {
2669       case 14:
2670         emit_int8(0x66); // size prefix
2671       case 13:
2672         emit_int8(0x66); // size prefix
2673       case 12:
2674         addr_nop_8();
2675         emit_int8(0x66); // size prefix
2676         emit_int8(0x66); // size prefix
2677         emit_int8(0x66); // size prefix
2678         emit_int8((unsigned char)0x90);
2679                          // nop
2680         break;
2681       case 11:
2682         emit_int8(0x66); // size prefix
2683       case 10:
2684         emit_int8(0x66); // size prefix
2685       case 9:
2686         emit_int8(0x66); // size prefix
2687       case 8:
2688         addr_nop_8();
2689         break;
2690       case 7:
2691         addr_nop_7();
2692         break;
2693       case 6:
2694         emit_int8(0x66); // size prefix
2695       case 5:
2696         addr_nop_5();
2697         break;
2698       case 4:
2699         addr_nop_4();
2700         break;
2701       case 3:
2702         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2703         emit_int8(0x66); // size prefix
2704       case 2:
2705         emit_int8(0x66); // size prefix
2706       case 1:
2707         emit_int8((unsigned char)0x90);
2708                          // nop
2709         break;
2710       default:
2711         assert(i == 0, " ");
2712     }
2713     return;
2714   }
2715   if (UseAddressNop && VM_Version::is_amd()) {
2716     //
2717     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2718     //  1: 0x90
2719     //  2: 0x66 0x90
2720     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2721     //  4: 0x0F 0x1F 0x40 0x00
2722     //  5: 0x0F 0x1F 0x44 0x00 0x00
2723     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2724     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2725     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2726     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2727     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2728     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2729 
2730     // The rest coding is AMD specific - use consecutive address nops
2731 
2732     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2733     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2734     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2735     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2736     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2737     //     Size prefixes (0x66) are added for larger sizes
2738 
2739     while(i >= 22) {
2740       i -= 11;
2741       emit_int8(0x66); // size prefix
2742       emit_int8(0x66); // size prefix
2743       emit_int8(0x66); // size prefix
2744       addr_nop_8();
2745     }
2746     // Generate first nop for size between 21-12
2747     switch (i) {
2748       case 21:
2749         i -= 1;
2750         emit_int8(0x66); // size prefix
2751       case 20:
2752       case 19:
2753         i -= 1;
2754         emit_int8(0x66); // size prefix
2755       case 18:
2756       case 17:
2757         i -= 1;
2758         emit_int8(0x66); // size prefix
2759       case 16:
2760       case 15:
2761         i -= 8;
2762         addr_nop_8();
2763         break;
2764       case 14:
2765       case 13:
2766         i -= 7;
2767         addr_nop_7();
2768         break;
2769       case 12:
2770         i -= 6;
2771         emit_int8(0x66); // size prefix
2772         addr_nop_5();
2773         break;
2774       default:
2775         assert(i < 12, " ");
2776     }
2777 
2778     // Generate second nop for size between 11-1
2779     switch (i) {
2780       case 11:
2781         emit_int8(0x66); // size prefix
2782       case 10:
2783         emit_int8(0x66); // size prefix
2784       case 9:
2785         emit_int8(0x66); // size prefix
2786       case 8:
2787         addr_nop_8();
2788         break;
2789       case 7:
2790         addr_nop_7();
2791         break;
2792       case 6:
2793         emit_int8(0x66); // size prefix
2794       case 5:
2795         addr_nop_5();
2796         break;
2797       case 4:
2798         addr_nop_4();
2799         break;
2800       case 3:
2801         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2802         emit_int8(0x66); // size prefix
2803       case 2:
2804         emit_int8(0x66); // size prefix
2805       case 1:
2806         emit_int8((unsigned char)0x90);
2807                          // nop
2808         break;
2809       default:
2810         assert(i == 0, " ");
2811     }
2812     return;
2813   }
2814 
2815   // Using nops with size prefixes "0x66 0x90".
2816   // From AMD Optimization Guide:
2817   //  1: 0x90
2818   //  2: 0x66 0x90
2819   //  3: 0x66 0x66 0x90
2820   //  4: 0x66 0x66 0x66 0x90
2821   //  5: 0x66 0x66 0x90 0x66 0x90
2822   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2823   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2824   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2825   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2826   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2827   //
2828   while(i > 12) {
2829     i -= 4;
2830     emit_int8(0x66); // size prefix
2831     emit_int8(0x66);
2832     emit_int8(0x66);
2833     emit_int8((unsigned char)0x90);
2834                      // nop
2835   }
2836   // 1 - 12 nops
2837   if(i > 8) {
2838     if(i > 9) {
2839       i -= 1;
2840       emit_int8(0x66);
2841     }
2842     i -= 3;
2843     emit_int8(0x66);
2844     emit_int8(0x66);
2845     emit_int8((unsigned char)0x90);
2846   }
2847   // 1 - 8 nops
2848   if(i > 4) {
2849     if(i > 6) {
2850       i -= 1;
2851       emit_int8(0x66);
2852     }
2853     i -= 3;
2854     emit_int8(0x66);
2855     emit_int8(0x66);
2856     emit_int8((unsigned char)0x90);
2857   }
2858   switch (i) {
2859     case 4:
2860       emit_int8(0x66);
2861     case 3:
2862       emit_int8(0x66);
2863     case 2:
2864       emit_int8(0x66);
2865     case 1:
2866       emit_int8((unsigned char)0x90);
2867       break;
2868     default:
2869       assert(i == 0, " ");
2870   }
2871 }
2872 
2873 void Assembler::notl(Register dst) {
2874   int encode = prefix_and_encode(dst->encoding());
2875   emit_int8((unsigned char)0xF7);
2876   emit_int8((unsigned char)(0xD0 | encode));
2877 }
2878 
2879 void Assembler::orl(Address dst, int32_t imm32) {
2880   InstructionMark im(this);
2881   prefix(dst);
2882   emit_arith_operand(0x81, rcx, dst, imm32);
2883 }
2884 
2885 void Assembler::orl(Register dst, int32_t imm32) {
2886   prefix(dst);
2887   emit_arith(0x81, 0xC8, dst, imm32);
2888 }
2889 
2890 void Assembler::orl(Register dst, Address src) {
2891   InstructionMark im(this);
2892   prefix(src, dst);
2893   emit_int8(0x0B);
2894   emit_operand(dst, src);
2895 }
2896 
2897 void Assembler::orl(Register dst, Register src) {
2898   (void) prefix_and_encode(dst->encoding(), src->encoding());
2899   emit_arith(0x0B, 0xC0, dst, src);
2900 }
2901 
2902 void Assembler::orl(Address dst, Register src) {
2903   InstructionMark im(this);
2904   prefix(dst, src);
2905   emit_int8(0x09);
2906   emit_operand(src, dst);
2907 }
2908 
2909 void Assembler::packuswb(XMMRegister dst, Address src) {
2910   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2911   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2912   if (VM_Version::supports_evex()) {
2913     tuple_type = EVEX_FV;
2914     input_size_in_bits = EVEX_32bit;
2915   }
2916   emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
2917                   false, (VM_Version::supports_avx512dq() == false));
2918 }
2919 
2920 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2921   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2922   emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
2923                   false, (VM_Version::supports_avx512dq() == false));
2924 }
2925 
2926 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
2927   assert(UseAVX > 0, "some form of AVX must be enabled");
2928   emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len,
2929                  false, (VM_Version::supports_avx512dq() == false));
2930 }
2931 
2932 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
2933   assert(VM_Version::supports_avx2(), "");
2934   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
2935                                       VEX_OPCODE_0F_3A, true, vector_len);
2936   emit_int8(0x00);
2937   emit_int8(0xC0 | encode);
2938   emit_int8(imm8);
2939 }
2940 
2941 void Assembler::pause() {
2942   emit_int8((unsigned char)0xF3);
2943   emit_int8((unsigned char)0x90);
2944 }
2945 
2946 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2947   assert(VM_Version::supports_sse4_2(), "");
2948   InstructionMark im(this);
2949   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A,
2950               false, AVX_128bit, true);
2951   emit_int8(0x61);
2952   emit_operand(dst, src);
2953   emit_int8(imm8);
2954 }
2955 
2956 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2957   assert(VM_Version::supports_sse4_2(), "");
2958   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
2959                                       VEX_OPCODE_0F_3A, false, AVX_128bit, true);
2960   emit_int8(0x61);
2961   emit_int8((unsigned char)(0xC0 | encode));
2962   emit_int8(imm8);
2963 }
2964 
2965 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
2966   assert(VM_Version::supports_sse4_1(), "");
2967   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2968                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2969   emit_int8(0x16);
2970   emit_int8((unsigned char)(0xC0 | encode));
2971   emit_int8(imm8);
2972 }
2973 
2974 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
2975   assert(VM_Version::supports_sse4_1(), "");
2976   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2977                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2978   emit_int8(0x16);
2979   emit_int8((unsigned char)(0xC0 | encode));
2980   emit_int8(imm8);
2981 }
2982 
2983 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
2984   assert(VM_Version::supports_sse4_1(), "");
2985   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2986                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2987   emit_int8(0x22);
2988   emit_int8((unsigned char)(0xC0 | encode));
2989   emit_int8(imm8);
2990 }
2991 
2992 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
2993   assert(VM_Version::supports_sse4_1(), "");
2994   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
2995                                       false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
2996   emit_int8(0x22);
2997   emit_int8((unsigned char)(0xC0 | encode));
2998   emit_int8(imm8);
2999 }
3000 
3001 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3002   assert(VM_Version::supports_sse4_1(), "");
3003   if (VM_Version::supports_evex()) {
3004     tuple_type = EVEX_HVM;
3005   }
3006   InstructionMark im(this);
3007   simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
3008   emit_int8(0x30);
3009   emit_operand(dst, src);
3010 }
3011 
3012 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3013   assert(VM_Version::supports_sse4_1(), "");
3014   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
3015   emit_int8(0x30);
3016   emit_int8((unsigned char)(0xC0 | encode));
3017 }
3018 
3019 // generic
3020 void Assembler::pop(Register dst) {
3021   int encode = prefix_and_encode(dst->encoding());
3022   emit_int8(0x58 | encode);
3023 }
3024 
3025 void Assembler::popcntl(Register dst, Address src) {
3026   assert(VM_Version::supports_popcnt(), "must support");
3027   InstructionMark im(this);
3028   emit_int8((unsigned char)0xF3);
3029   prefix(src, dst);
3030   emit_int8(0x0F);
3031   emit_int8((unsigned char)0xB8);
3032   emit_operand(dst, src);
3033 }
3034 
3035 void Assembler::popcntl(Register dst, Register src) {
3036   assert(VM_Version::supports_popcnt(), "must support");
3037   emit_int8((unsigned char)0xF3);
3038   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3039   emit_int8(0x0F);
3040   emit_int8((unsigned char)0xB8);
3041   emit_int8((unsigned char)(0xC0 | encode));
3042 }
3043 
3044 void Assembler::popf() {
3045   emit_int8((unsigned char)0x9D);
3046 }
3047 
3048 #ifndef _LP64 // no 32bit push/pop on amd64
3049 void Assembler::popl(Address dst) {
3050   // NOTE: this will adjust stack by 8byte on 64bits
3051   InstructionMark im(this);
3052   prefix(dst);
3053   emit_int8((unsigned char)0x8F);
3054   emit_operand(rax, dst);
3055 }
3056 #endif
3057 
3058 void Assembler::prefetch_prefix(Address src) {
3059   prefix(src);
3060   emit_int8(0x0F);
3061 }
3062 
3063 void Assembler::prefetchnta(Address src) {
3064   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3065   InstructionMark im(this);
3066   prefetch_prefix(src);
3067   emit_int8(0x18);
3068   emit_operand(rax, src); // 0, src
3069 }
3070 
3071 void Assembler::prefetchr(Address src) {
3072   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3073   InstructionMark im(this);
3074   prefetch_prefix(src);
3075   emit_int8(0x0D);
3076   emit_operand(rax, src); // 0, src
3077 }
3078 
3079 void Assembler::prefetcht0(Address src) {
3080   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3081   InstructionMark im(this);
3082   prefetch_prefix(src);
3083   emit_int8(0x18);
3084   emit_operand(rcx, src); // 1, src
3085 }
3086 
3087 void Assembler::prefetcht1(Address src) {
3088   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3089   InstructionMark im(this);
3090   prefetch_prefix(src);
3091   emit_int8(0x18);
3092   emit_operand(rdx, src); // 2, src
3093 }
3094 
3095 void Assembler::prefetcht2(Address src) {
3096   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3097   InstructionMark im(this);
3098   prefetch_prefix(src);
3099   emit_int8(0x18);
3100   emit_operand(rbx, src); // 3, src
3101 }
3102 
3103 void Assembler::prefetchw(Address src) {
3104   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3105   InstructionMark im(this);
3106   prefetch_prefix(src);
3107   emit_int8(0x0D);
3108   emit_operand(rcx, src); // 1, src
3109 }
3110 
3111 void Assembler::prefix(Prefix p) {
3112   emit_int8(p);
3113 }
3114 
3115 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3116   assert(VM_Version::supports_ssse3(), "");
3117   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
3118                                       false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3119   emit_int8(0x00);
3120   emit_int8((unsigned char)(0xC0 | encode));
3121 }
3122 
3123 void Assembler::pshufb(XMMRegister dst, Address src) {
3124   assert(VM_Version::supports_ssse3(), "");
3125   if (VM_Version::supports_evex()) {
3126     tuple_type = EVEX_FVM;
3127   }
3128   InstructionMark im(this);
3129   simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
3130               false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3131   emit_int8(0x00);
3132   emit_operand(dst, src);
3133 }
3134 
3135 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
3136   assert(isByte(mode), "invalid value");
3137   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3138   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
3139   emit_int8(mode & 0xFF);
3140 
3141 }
3142 
3143 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
3144   assert(isByte(mode), "invalid value");
3145   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3146   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3147   if (VM_Version::supports_evex()) {
3148     tuple_type = EVEX_FV;
3149     input_size_in_bits = EVEX_32bit;
3150   }
3151   InstructionMark im(this);
3152   simd_prefix(dst, src, VEX_SIMD_66, false);
3153   emit_int8(0x70);
3154   emit_operand(dst, src);
3155   emit_int8(mode & 0xFF);
3156 }
3157 
3158 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3159   assert(isByte(mode), "invalid value");
3160   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3161   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, false,
3162                         (VM_Version::supports_avx512bw() == false));
3163   emit_int8(mode & 0xFF);
3164 }
3165 
3166 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
3167   assert(isByte(mode), "invalid value");
3168   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3169   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3170   if (VM_Version::supports_evex()) {
3171     tuple_type = EVEX_FVM;
3172   }
3173   InstructionMark im(this);
3174   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, false, VEX_OPCODE_0F,
3175               false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3176   emit_int8(0x70);
3177   emit_operand(dst, src);
3178   emit_int8(mode & 0xFF);
3179 }
3180 
3181 void Assembler::psrldq(XMMRegister dst, int shift) {
3182   // Shift 128 bit value in xmm register by number of bytes.
3183   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3184   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3185   emit_int8(0x73);
3186   emit_int8((unsigned char)(0xC0 | encode));
3187   emit_int8(shift);
3188 }
3189 
3190 void Assembler::pslldq(XMMRegister dst, int shift) {
3191   // Shift left 128 bit value in xmm register by number of bytes.
3192   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3193   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
3194   emit_int8(0x73);
3195   emit_int8((unsigned char)(0xC0 | encode));
3196   emit_int8(shift);
3197 }
3198 
3199 void Assembler::ptest(XMMRegister dst, Address src) {
3200   assert(VM_Version::supports_sse4_1(), "");
3201   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3202   InstructionMark im(this);
3203   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false,
3204               VEX_OPCODE_0F_38, false, AVX_128bit, true);
3205   emit_int8(0x17);
3206   emit_operand(dst, src);
3207 }
3208 
3209 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
3210   assert(VM_Version::supports_sse4_1(), "");
3211   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
3212                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
3213   emit_int8(0x17);
3214   emit_int8((unsigned char)(0xC0 | encode));
3215 }
3216 
3217 void Assembler::vptest(XMMRegister dst, Address src) {
3218   assert(VM_Version::supports_avx(), "");
3219   InstructionMark im(this);
3220   int vector_len = AVX_256bit;
3221   assert(dst != xnoreg, "sanity");
3222   int dst_enc = dst->encoding();
3223   // swap src<->dst for encoding
3224   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len, true, false);
3225   emit_int8(0x17);
3226   emit_operand(dst, src);
3227 }
3228 
3229 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
3230   assert(VM_Version::supports_avx(), "");
3231   int vector_len = AVX_256bit;
3232   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
3233                                      vector_len, VEX_OPCODE_0F_38, true, false);
3234   emit_int8(0x17);
3235   emit_int8((unsigned char)(0xC0 | encode));
3236 }
3237 
3238 void Assembler::punpcklbw(XMMRegister dst, Address src) {
3239   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3240   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3241   if (VM_Version::supports_evex()) {
3242     tuple_type = EVEX_FVM;
3243   }
3244   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false));
3245 }
3246 
3247 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3248   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3249   emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false));
3250 }
3251 
3252 void Assembler::punpckldq(XMMRegister dst, Address src) {
3253   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3254   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3255   if (VM_Version::supports_evex()) {
3256     tuple_type = EVEX_FV;
3257     input_size_in_bits = EVEX_32bit;
3258   }
3259   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3260 }
3261 
3262 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
3263   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3264   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
3265 }
3266 
3267 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
3268   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3269   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
3270 }
3271 
3272 void Assembler::push(int32_t imm32) {
3273   // in 64bits we push 64bits onto the stack but only
3274   // take a 32bit immediate
3275   emit_int8(0x68);
3276   emit_int32(imm32);
3277 }
3278 
3279 void Assembler::push(Register src) {
3280   int encode = prefix_and_encode(src->encoding());
3281 
3282   emit_int8(0x50 | encode);
3283 }
3284 
3285 void Assembler::pushf() {
3286   emit_int8((unsigned char)0x9C);
3287 }
3288 
3289 #ifndef _LP64 // no 32bit push/pop on amd64
3290 void Assembler::pushl(Address src) {
3291   // Note this will push 64bit on 64bit
3292   InstructionMark im(this);
3293   prefix(src);
3294   emit_int8((unsigned char)0xFF);
3295   emit_operand(rsi, src);
3296 }
3297 #endif
3298 
3299 void Assembler::rcll(Register dst, int imm8) {
3300   assert(isShiftCount(imm8), "illegal shift count");
3301   int encode = prefix_and_encode(dst->encoding());
3302   if (imm8 == 1) {
3303     emit_int8((unsigned char)0xD1);
3304     emit_int8((unsigned char)(0xD0 | encode));
3305   } else {
3306     emit_int8((unsigned char)0xC1);
3307     emit_int8((unsigned char)0xD0 | encode);
3308     emit_int8(imm8);
3309   }
3310 }
3311 
3312 void Assembler::rdtsc() {
3313   emit_int8((unsigned char)0x0F);
3314   emit_int8((unsigned char)0x31);
3315 }
3316 
3317 // copies data from [esi] to [edi] using rcx pointer sized words
3318 // generic
3319 void Assembler::rep_mov() {
3320   emit_int8((unsigned char)0xF3);
3321   // MOVSQ
3322   LP64_ONLY(prefix(REX_W));
3323   emit_int8((unsigned char)0xA5);
3324 }
3325 
3326 // sets rcx bytes with rax, value at [edi]
3327 void Assembler::rep_stosb() {
3328   emit_int8((unsigned char)0xF3); // REP
3329   LP64_ONLY(prefix(REX_W));
3330   emit_int8((unsigned char)0xAA); // STOSB
3331 }
3332 
3333 // sets rcx pointer sized words with rax, value at [edi]
3334 // generic
3335 void Assembler::rep_stos() {
3336   emit_int8((unsigned char)0xF3); // REP
3337   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
3338   emit_int8((unsigned char)0xAB);
3339 }
3340 
3341 // scans rcx pointer sized words at [edi] for occurance of rax,
3342 // generic
3343 void Assembler::repne_scan() { // repne_scan
3344   emit_int8((unsigned char)0xF2);
3345   // SCASQ
3346   LP64_ONLY(prefix(REX_W));
3347   emit_int8((unsigned char)0xAF);
3348 }
3349 
3350 #ifdef _LP64
3351 // scans rcx 4 byte words at [edi] for occurance of rax,
3352 // generic
3353 void Assembler::repne_scanl() { // repne_scan
3354   emit_int8((unsigned char)0xF2);
3355   // SCASL
3356   emit_int8((unsigned char)0xAF);
3357 }
3358 #endif
3359 
3360 void Assembler::ret(int imm16) {
3361   if (imm16 == 0) {
3362     emit_int8((unsigned char)0xC3);
3363   } else {
3364     emit_int8((unsigned char)0xC2);
3365     emit_int16(imm16);
3366   }
3367 }
3368 
3369 void Assembler::sahf() {
3370 #ifdef _LP64
3371   // Not supported in 64bit mode
3372   ShouldNotReachHere();
3373 #endif
3374   emit_int8((unsigned char)0x9E);
3375 }
3376 
3377 void Assembler::sarl(Register dst, int imm8) {
3378   int encode = prefix_and_encode(dst->encoding());
3379   assert(isShiftCount(imm8), "illegal shift count");
3380   if (imm8 == 1) {
3381     emit_int8((unsigned char)0xD1);
3382     emit_int8((unsigned char)(0xF8 | encode));
3383   } else {
3384     emit_int8((unsigned char)0xC1);
3385     emit_int8((unsigned char)(0xF8 | encode));
3386     emit_int8(imm8);
3387   }
3388 }
3389 
3390 void Assembler::sarl(Register dst) {
3391   int encode = prefix_and_encode(dst->encoding());
3392   emit_int8((unsigned char)0xD3);
3393   emit_int8((unsigned char)(0xF8 | encode));
3394 }
3395 
3396 void Assembler::sbbl(Address dst, int32_t imm32) {
3397   InstructionMark im(this);
3398   prefix(dst);
3399   emit_arith_operand(0x81, rbx, dst, imm32);
3400 }
3401 
3402 void Assembler::sbbl(Register dst, int32_t imm32) {
3403   prefix(dst);
3404   emit_arith(0x81, 0xD8, dst, imm32);
3405 }
3406 
3407 
3408 void Assembler::sbbl(Register dst, Address src) {
3409   InstructionMark im(this);
3410   prefix(src, dst);
3411   emit_int8(0x1B);
3412   emit_operand(dst, src);
3413 }
3414 
3415 void Assembler::sbbl(Register dst, Register src) {
3416   (void) prefix_and_encode(dst->encoding(), src->encoding());
3417   emit_arith(0x1B, 0xC0, dst, src);
3418 }
3419 
3420 void Assembler::setb(Condition cc, Register dst) {
3421   assert(0 <= cc && cc < 16, "illegal cc");
3422   int encode = prefix_and_encode(dst->encoding(), true);
3423   emit_int8(0x0F);
3424   emit_int8((unsigned char)0x90 | cc);
3425   emit_int8((unsigned char)(0xC0 | encode));
3426 }
3427 
3428 void Assembler::shll(Register dst, int imm8) {
3429   assert(isShiftCount(imm8), "illegal shift count");
3430   int encode = prefix_and_encode(dst->encoding());
3431   if (imm8 == 1 ) {
3432     emit_int8((unsigned char)0xD1);
3433     emit_int8((unsigned char)(0xE0 | encode));
3434   } else {
3435     emit_int8((unsigned char)0xC1);
3436     emit_int8((unsigned char)(0xE0 | encode));
3437     emit_int8(imm8);
3438   }
3439 }
3440 
3441 void Assembler::shll(Register dst) {
3442   int encode = prefix_and_encode(dst->encoding());
3443   emit_int8((unsigned char)0xD3);
3444   emit_int8((unsigned char)(0xE0 | encode));
3445 }
3446 
3447 void Assembler::shrl(Register dst, int imm8) {
3448   assert(isShiftCount(imm8), "illegal shift count");
3449   int encode = prefix_and_encode(dst->encoding());
3450   emit_int8((unsigned char)0xC1);
3451   emit_int8((unsigned char)(0xE8 | encode));
3452   emit_int8(imm8);
3453 }
3454 
3455 void Assembler::shrl(Register dst) {
3456   int encode = prefix_and_encode(dst->encoding());
3457   emit_int8((unsigned char)0xD3);
3458   emit_int8((unsigned char)(0xE8 | encode));
3459 }
3460 
3461 // copies a single word from [esi] to [edi]
3462 void Assembler::smovl() {
3463   emit_int8((unsigned char)0xA5);
3464 }
3465 
3466 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
3467   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3468   if (VM_Version::supports_evex()) {
3469     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3470   } else {
3471     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3472   }
3473 }
3474 
3475 void Assembler::sqrtsd(XMMRegister dst, Address src) {
3476   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3477   if (VM_Version::supports_evex()) {
3478     tuple_type = EVEX_T1S;
3479     input_size_in_bits = EVEX_64bit;
3480     emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
3481   } else {
3482     emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
3483   }
3484 }
3485 
3486 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
3487   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3488   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3489 }
3490 
3491 void Assembler::std() {
3492   emit_int8((unsigned char)0xFD);
3493 }
3494 
3495 void Assembler::sqrtss(XMMRegister dst, Address src) {
3496   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3497   if (VM_Version::supports_evex()) {
3498     tuple_type = EVEX_T1S;
3499     input_size_in_bits = EVEX_32bit;
3500   }
3501   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
3502 }
3503 
3504 void Assembler::stmxcsr( Address dst) {
3505   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3506   InstructionMark im(this);
3507   prefix(dst);
3508   emit_int8(0x0F);
3509   emit_int8((unsigned char)0xAE);
3510   emit_operand(as_Register(3), dst);
3511 }
3512 
3513 void Assembler::subl(Address dst, int32_t imm32) {
3514   InstructionMark im(this);
3515   prefix(dst);
3516   emit_arith_operand(0x81, rbp, dst, imm32);
3517 }
3518 
3519 void Assembler::subl(Address dst, Register src) {
3520   InstructionMark im(this);
3521   prefix(dst, src);
3522   emit_int8(0x29);
3523   emit_operand(src, dst);
3524 }
3525 
3526 void Assembler::subl(Register dst, int32_t imm32) {
3527   prefix(dst);
3528   emit_arith(0x81, 0xE8, dst, imm32);
3529 }
3530 
3531 // Force generation of a 4 byte immediate value even if it fits into 8bit
3532 void Assembler::subl_imm32(Register dst, int32_t imm32) {
3533   prefix(dst);
3534   emit_arith_imm32(0x81, 0xE8, dst, imm32);
3535 }
3536 
3537 void Assembler::subl(Register dst, Address src) {
3538   InstructionMark im(this);
3539   prefix(src, dst);
3540   emit_int8(0x2B);
3541   emit_operand(dst, src);
3542 }
3543 
3544 void Assembler::subl(Register dst, Register src) {
3545   (void) prefix_and_encode(dst->encoding(), src->encoding());
3546   emit_arith(0x2B, 0xC0, dst, src);
3547 }
3548 
3549 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
3550   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3551   if (VM_Version::supports_evex()) {
3552     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3553   } else {
3554     emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
3555   }
3556 }
3557 
3558 void Assembler::subsd(XMMRegister dst, Address src) {
3559   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3560   if (VM_Version::supports_evex()) {
3561     tuple_type = EVEX_T1S;
3562     input_size_in_bits = EVEX_64bit;
3563   }
3564   emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
3565 }
3566 
3567 void Assembler::subss(XMMRegister dst, XMMRegister src) {
3568   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3569   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3570 }
3571 
3572 void Assembler::subss(XMMRegister dst, Address src) {
3573   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3574   if (VM_Version::supports_evex()) {
3575     tuple_type = EVEX_T1S;
3576     input_size_in_bits = EVEX_32bit;
3577   }
3578   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
3579 }
3580 
3581 void Assembler::testb(Register dst, int imm8) {
3582   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
3583   (void) prefix_and_encode(dst->encoding(), true);
3584   emit_arith_b(0xF6, 0xC0, dst, imm8);
3585 }
3586 
3587 void Assembler::testl(Register dst, int32_t imm32) {
3588   // not using emit_arith because test
3589   // doesn't support sign-extension of
3590   // 8bit operands
3591   int encode = dst->encoding();
3592   if (encode == 0) {
3593     emit_int8((unsigned char)0xA9);
3594   } else {
3595     encode = prefix_and_encode(encode);
3596     emit_int8((unsigned char)0xF7);
3597     emit_int8((unsigned char)(0xC0 | encode));
3598   }
3599   emit_int32(imm32);
3600 }
3601 
3602 void Assembler::testl(Register dst, Register src) {
3603   (void) prefix_and_encode(dst->encoding(), src->encoding());
3604   emit_arith(0x85, 0xC0, dst, src);
3605 }
3606 
3607 void Assembler::testl(Register dst, Address  src) {
3608   InstructionMark im(this);
3609   prefix(src, dst);
3610   emit_int8((unsigned char)0x85);
3611   emit_operand(dst, src);
3612 }
3613 
3614 void Assembler::tzcntl(Register dst, Register src) {
3615   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3616   emit_int8((unsigned char)0xF3);
3617   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3618   emit_int8(0x0F);
3619   emit_int8((unsigned char)0xBC);
3620   emit_int8((unsigned char)0xC0 | encode);
3621 }
3622 
3623 void Assembler::tzcntq(Register dst, Register src) {
3624   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
3625   emit_int8((unsigned char)0xF3);
3626   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
3627   emit_int8(0x0F);
3628   emit_int8((unsigned char)0xBC);
3629   emit_int8((unsigned char)(0xC0 | encode));
3630 }
3631 
3632 void Assembler::ucomisd(XMMRegister dst, Address src) {
3633   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3634   if (VM_Version::supports_evex()) {
3635     tuple_type = EVEX_T1S;
3636     input_size_in_bits = EVEX_64bit;
3637     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3638   } else {
3639     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3640   }
3641 }
3642 
3643 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
3644   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3645   if (VM_Version::supports_evex()) {
3646     emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
3647   } else {
3648     emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
3649   }
3650 }
3651 
3652 void Assembler::ucomiss(XMMRegister dst, Address src) {
3653   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3654   if (VM_Version::supports_evex()) {
3655     tuple_type = EVEX_T1S;
3656     input_size_in_bits = EVEX_32bit;
3657   }
3658   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3659 }
3660 
3661 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
3662   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3663   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
3664 }
3665 
3666 void Assembler::xabort(int8_t imm8) {
3667   emit_int8((unsigned char)0xC6);
3668   emit_int8((unsigned char)0xF8);
3669   emit_int8((unsigned char)(imm8 & 0xFF));
3670 }
3671 
3672 void Assembler::xaddl(Address dst, Register src) {
3673   InstructionMark im(this);
3674   prefix(dst, src);
3675   emit_int8(0x0F);
3676   emit_int8((unsigned char)0xC1);
3677   emit_operand(src, dst);
3678 }
3679 
3680 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
3681   InstructionMark im(this);
3682   relocate(rtype);
3683   if (abort.is_bound()) {
3684     address entry = target(abort);
3685     assert(entry != NULL, "abort entry NULL");
3686     intptr_t offset = entry - pc();
3687     emit_int8((unsigned char)0xC7);
3688     emit_int8((unsigned char)0xF8);
3689     emit_int32(offset - 6); // 2 opcode + 4 address
3690   } else {
3691     abort.add_patch_at(code(), locator());
3692     emit_int8((unsigned char)0xC7);
3693     emit_int8((unsigned char)0xF8);
3694     emit_int32(0);
3695   }
3696 }
3697 
3698 void Assembler::xchgl(Register dst, Address src) { // xchg
3699   InstructionMark im(this);
3700   prefix(src, dst);
3701   emit_int8((unsigned char)0x87);
3702   emit_operand(dst, src);
3703 }
3704 
3705 void Assembler::xchgl(Register dst, Register src) {
3706   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3707   emit_int8((unsigned char)0x87);
3708   emit_int8((unsigned char)(0xC0 | encode));
3709 }
3710 
3711 void Assembler::xend() {
3712   emit_int8((unsigned char)0x0F);
3713   emit_int8((unsigned char)0x01);
3714   emit_int8((unsigned char)0xD5);
3715 }
3716 
3717 void Assembler::xgetbv() {
3718   emit_int8(0x0F);
3719   emit_int8(0x01);
3720   emit_int8((unsigned char)0xD0);
3721 }
3722 
3723 void Assembler::xorl(Register dst, int32_t imm32) {
3724   prefix(dst);
3725   emit_arith(0x81, 0xF0, dst, imm32);
3726 }
3727 
3728 void Assembler::xorl(Register dst, Address src) {
3729   InstructionMark im(this);
3730   prefix(src, dst);
3731   emit_int8(0x33);
3732   emit_operand(dst, src);
3733 }
3734 
3735 void Assembler::xorl(Register dst, Register src) {
3736   (void) prefix_and_encode(dst->encoding(), src->encoding());
3737   emit_arith(0x33, 0xC0, dst, src);
3738 }
3739 
3740 
3741 // AVX 3-operands scalar float-point arithmetic instructions
3742 
3743 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
3744   assert(VM_Version::supports_avx(), "");
3745   if (VM_Version::supports_evex()) {
3746     tuple_type = EVEX_T1S;
3747     input_size_in_bits = EVEX_64bit;
3748     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3749   } else {
3750     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3751   }
3752 }
3753 
3754 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3755   assert(VM_Version::supports_avx(), "");
3756   if (VM_Version::supports_evex()) {
3757     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3758   } else {
3759     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3760   }
3761 }
3762 
3763 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
3764   assert(VM_Version::supports_avx(), "");
3765   if (VM_Version::supports_evex()) {
3766     tuple_type = EVEX_T1S;
3767     input_size_in_bits = EVEX_32bit;
3768   }
3769   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3770 }
3771 
3772 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3773   assert(VM_Version::supports_avx(), "");
3774   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3775 }
3776 
3777 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
3778   assert(VM_Version::supports_avx(), "");
3779   if (VM_Version::supports_evex()) {
3780     tuple_type = EVEX_T1S;
3781     input_size_in_bits = EVEX_64bit;
3782     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3783   } else {
3784     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3785   }
3786 }
3787 
3788 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3789   assert(VM_Version::supports_avx(), "");
3790   if (VM_Version::supports_evex()) {
3791     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3792   } else {
3793     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3794   }
3795 }
3796 
3797 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
3798   assert(VM_Version::supports_avx(), "");
3799   if (VM_Version::supports_evex()) {
3800     tuple_type = EVEX_T1S;
3801     input_size_in_bits = EVEX_32bit;
3802   }
3803   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3804 }
3805 
3806 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3807   assert(VM_Version::supports_avx(), "");
3808   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3809 }
3810 
3811 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
3812   assert(VM_Version::supports_avx(), "");
3813   if (VM_Version::supports_evex()) {
3814     tuple_type = EVEX_T1S;
3815     input_size_in_bits = EVEX_64bit;
3816     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3817   } else {
3818     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3819   }
3820 }
3821 
3822 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3823   assert(VM_Version::supports_avx(), "");
3824   if (VM_Version::supports_evex()) {
3825     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3826   } else {
3827     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3828   }
3829 }
3830 
3831 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
3832   assert(VM_Version::supports_avx(), "");
3833   if (VM_Version::supports_evex()) {
3834     tuple_type = EVEX_T1S;
3835     input_size_in_bits = EVEX_32bit;
3836   }
3837   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3838 }
3839 
3840 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3841   assert(VM_Version::supports_avx(), "");
3842   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3843 }
3844 
3845 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
3846   assert(VM_Version::supports_avx(), "");
3847   if (VM_Version::supports_evex()) {
3848     tuple_type = EVEX_T1S;
3849     input_size_in_bits = EVEX_64bit;
3850     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3851   } else {
3852     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3853   }
3854 }
3855 
3856 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3857   assert(VM_Version::supports_avx(), "");
3858   if (VM_Version::supports_evex()) {
3859     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3860   } else {
3861     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
3862   }
3863 }
3864 
3865 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
3866   assert(VM_Version::supports_avx(), "");
3867   if (VM_Version::supports_evex()) {
3868     tuple_type = EVEX_T1S;
3869     input_size_in_bits = EVEX_32bit;
3870   }
3871   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3872 }
3873 
3874 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3875   assert(VM_Version::supports_avx(), "");
3876   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
3877 }
3878 
3879 //====================VECTOR ARITHMETIC=====================================
3880 
3881 // Float-point vector arithmetic
3882 
3883 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
3884   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3885   if (VM_Version::supports_evex()) {
3886     emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
3887   } else {
3888     emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
3889   }
3890 }
3891 
3892 void Assembler::addps(XMMRegister dst, XMMRegister src) {
3893   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3894   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
3895 }
3896 
3897 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3898   assert(VM_Version::supports_avx(), "");
3899   if (VM_Version::supports_evex()) {
3900     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3901   } else {
3902     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3903   }
3904 }
3905 
3906 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3907   assert(VM_Version::supports_avx(), "");
3908   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3909 }
3910 
3911 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3912   assert(VM_Version::supports_avx(), "");
3913   if (VM_Version::supports_evex()) {
3914     tuple_type = EVEX_FV;
3915     input_size_in_bits = EVEX_64bit;
3916     emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3917   } else {
3918     emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
3919   }
3920 }
3921 
3922 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3923   assert(VM_Version::supports_avx(), "");
3924   if (VM_Version::supports_evex()) {
3925     tuple_type = EVEX_FV;
3926     input_size_in_bits = EVEX_32bit;
3927   }
3928   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
3929 }
3930 
3931 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
3932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3933   if (VM_Version::supports_evex()) {
3934     emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
3935   } else {
3936     emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
3937   }
3938 }
3939 
3940 void Assembler::subps(XMMRegister dst, XMMRegister src) {
3941   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3942   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
3943 }
3944 
3945 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3946   assert(VM_Version::supports_avx(), "");
3947   if (VM_Version::supports_evex()) {
3948     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3949   } else {
3950     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3951   }
3952 }
3953 
3954 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3955   assert(VM_Version::supports_avx(), "");
3956   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3957 }
3958 
3959 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3960   assert(VM_Version::supports_avx(), "");
3961   if (VM_Version::supports_evex()) {
3962     tuple_type = EVEX_FV;
3963     input_size_in_bits = EVEX_64bit;
3964     emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3965   } else {
3966     emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
3967   }
3968 }
3969 
3970 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3971   assert(VM_Version::supports_avx(), "");
3972   if (VM_Version::supports_evex()) {
3973     tuple_type = EVEX_FV;
3974     input_size_in_bits = EVEX_32bit;
3975   }
3976   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
3977 }
3978 
3979 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3980   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3981   if (VM_Version::supports_evex()) {
3982     emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
3983   } else {
3984     emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3985   }
3986 }
3987 
3988 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3989   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3990   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3991 }
3992 
3993 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3994   assert(VM_Version::supports_avx(), "");
3995   if (VM_Version::supports_evex()) {
3996     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3997   } else {
3998     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
3999   }
4000 }
4001 
4002 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4003   assert(VM_Version::supports_avx(), "");
4004   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4005 }
4006 
4007 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4008   assert(VM_Version::supports_avx(), "");
4009   if (VM_Version::supports_evex()) {
4010     tuple_type = EVEX_FV;
4011     input_size_in_bits = EVEX_64bit;
4012     emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4013   } else {
4014     emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
4015   }
4016 }
4017 
4018 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4019   assert(VM_Version::supports_avx(), "");
4020   if (VM_Version::supports_evex()) {
4021     tuple_type = EVEX_FV;
4022     input_size_in_bits = EVEX_32bit;
4023   }
4024   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
4025 }
4026 
4027 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
4028   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4029   if (VM_Version::supports_evex()) {
4030     emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
4031   } else {
4032     emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
4033   }
4034 }
4035 
4036 void Assembler::divps(XMMRegister dst, XMMRegister src) {
4037   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4038   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
4039 }
4040 
4041 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4042   assert(VM_Version::supports_avx(), "");
4043   if (VM_Version::supports_evex()) {
4044     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4045   } else {
4046     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4047   }
4048 }
4049 
4050 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4051   assert(VM_Version::supports_avx(), "");
4052   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4053 }
4054 
4055 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4056   assert(VM_Version::supports_avx(), "");
4057   if (VM_Version::supports_evex()) {
4058     tuple_type = EVEX_FV;
4059     input_size_in_bits = EVEX_64bit;
4060     emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4061   } else {
4062     emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
4063   }
4064 }
4065 
4066 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4067   assert(VM_Version::supports_avx(), "");
4068   if (VM_Version::supports_evex()) {
4069     tuple_type = EVEX_FV;
4070     input_size_in_bits = EVEX_32bit;
4071   }
4072   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
4073 }
4074 
4075 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
4076   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4077   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4078     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4079   } else {
4080     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
4081   }
4082 }
4083 
4084 void Assembler::andps(XMMRegister dst, XMMRegister src) {
4085   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4086   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, false,
4087                   (VM_Version::supports_avx512dq() == false));
4088 }
4089 
4090 void Assembler::andps(XMMRegister dst, Address src) {
4091   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4092   if (VM_Version::supports_evex()) {
4093     tuple_type = EVEX_FV;
4094     input_size_in_bits = EVEX_32bit;
4095   }
4096   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE,
4097                   false, (VM_Version::supports_avx512dq() == false));
4098 }
4099 
4100 void Assembler::andpd(XMMRegister dst, Address src) {
4101   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4102   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4103     tuple_type = EVEX_FV;
4104     input_size_in_bits = EVEX_64bit;
4105     emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
4106   } else {
4107     emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
4108   }
4109 }
4110 
4111 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4112   assert(VM_Version::supports_avx(), "");
4113   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4114     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4115   } else {
4116     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
4117   }
4118 }
4119 
4120 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4121   assert(VM_Version::supports_avx(), "");
4122   bool legacy_mode = (VM_Version::supports_avx512dq() == false);
4123   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, legacy_mode);
4124 }
4125 
4126 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4127   assert(VM_Version::supports_avx(), "");
4128   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4129     tuple_type = EVEX_FV;
4130     input_size_in_bits = EVEX_64bit;
4131     emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
4132   } else {
4133     emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
4134   }
4135 }
4136 
4137 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4138   assert(VM_Version::supports_avx(), "");
4139   if (VM_Version::supports_evex()) {
4140     tuple_type = EVEX_FV;
4141     input_size_in_bits = EVEX_32bit;
4142   }
4143   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len,
4144                  (VM_Version::supports_avx512dq() == false));
4145 }
4146 
4147 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
4148   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4149   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4150     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4151   } else {
4152     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
4153   }
4154 }
4155 
4156 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
4157   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4158   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE,
4159                   false, (VM_Version::supports_avx512dq() == false));
4160 }
4161 
4162 void Assembler::xorpd(XMMRegister dst, Address src) {
4163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4164   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4165     tuple_type = EVEX_FV;
4166     input_size_in_bits = EVEX_64bit;
4167     emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
4168   } else {
4169     emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
4170   }
4171 }
4172 
4173 void Assembler::xorps(XMMRegister dst, Address src) {
4174   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4175   if (VM_Version::supports_evex()) {
4176     tuple_type = EVEX_FV;
4177     input_size_in_bits = EVEX_32bit;
4178   }
4179   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, false,
4180                   (VM_Version::supports_avx512dq() == false));
4181 }
4182 
4183 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4184   assert(VM_Version::supports_avx(), "");
4185   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4186     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4187   } else {
4188     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
4189   }
4190 }
4191 
4192 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4193   assert(VM_Version::supports_avx(), "");
4194   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
4195                  (VM_Version::supports_avx512dq() == false));
4196 }
4197 
4198 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4199   assert(VM_Version::supports_avx(), "");
4200   if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
4201     tuple_type = EVEX_FV;
4202     input_size_in_bits = EVEX_64bit;
4203     emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
4204   } else {
4205     emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
4206   }
4207 }
4208 
4209 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4210   assert(VM_Version::supports_avx(), "");
4211   if (VM_Version::supports_evex()) {
4212     tuple_type = EVEX_FV;
4213     input_size_in_bits = EVEX_32bit;
4214   }
4215   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
4216                  (VM_Version::supports_avx512dq() == false));
4217 }
4218 
4219 // Integer vector arithmetic
4220 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4221   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4222          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4223   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
4224                                      VEX_OPCODE_0F_38, true, false);
4225   emit_int8(0x01);
4226   emit_int8((unsigned char)(0xC0 | encode));
4227 }
4228 
4229 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4230   assert(VM_Version::supports_avx() && (vector_len == 0) ||
4231          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
4232   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
4233                                      VEX_OPCODE_0F_38, true, false);
4234   emit_int8(0x02);
4235   emit_int8((unsigned char)(0xC0 | encode));
4236 }
4237 
4238 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
4239   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4240   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
4241 }
4242 
4243 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
4244   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4245   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
4246 }
4247 
4248 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
4249   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4250   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
4251 }
4252 
4253 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
4254   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4255   if (VM_Version::supports_evex()) {
4256     emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
4257   } else {
4258     emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
4259   }
4260 }
4261 
4262 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
4263   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4264   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4265                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
4266   emit_int8(0x01);
4267   emit_int8((unsigned char)(0xC0 | encode));
4268 }
4269 
4270 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
4271   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
4272   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
4273                                       VEX_OPCODE_0F_38, false, AVX_128bit, true);
4274   emit_int8(0x02);
4275   emit_int8((unsigned char)(0xC0 | encode));
4276 }
4277 
4278 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4279   assert(UseAVX > 0, "requires some form of AVX");
4280   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len,
4281                  (VM_Version::supports_avx512bw() == false));
4282 }
4283 
4284 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4285   assert(UseAVX > 0, "requires some form of AVX");
4286   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len,
4287                  (VM_Version::supports_avx512bw() == false));
4288 }
4289 
4290 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4291   assert(UseAVX > 0, "requires some form of AVX");
4292   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4293 }
4294 
4295 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4296   assert(UseAVX > 0, "requires some form of AVX");
4297   if (VM_Version::supports_evex()) {
4298     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4299   } else {
4300     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4301   }
4302 }
4303 
4304 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4305   assert(UseAVX > 0, "requires some form of AVX");
4306   if (VM_Version::supports_evex()) {
4307     tuple_type = EVEX_FVM;
4308   }
4309   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
4310 }
4311 
4312 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4313   assert(UseAVX > 0, "requires some form of AVX");
4314   if (VM_Version::supports_evex()) {
4315     tuple_type = EVEX_FVM;
4316   }
4317   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
4318 }
4319 
4320 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4321   assert(UseAVX > 0, "requires some form of AVX");
4322   if (VM_Version::supports_evex()) {
4323     tuple_type = EVEX_FV;
4324     input_size_in_bits = EVEX_32bit;
4325   }
4326   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
4327 }
4328 
4329 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4330   assert(UseAVX > 0, "requires some form of AVX");
4331   if (VM_Version::supports_evex()) {
4332     tuple_type = EVEX_FV;
4333     input_size_in_bits = EVEX_64bit;
4334     emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4335   } else {
4336     emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
4337   }
4338 }
4339 
4340 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
4341   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4342   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
4343 }
4344 
4345 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
4346   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4347   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
4348 }
4349 
4350 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
4351   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4352   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
4353 }
4354 
4355 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
4356   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4357   if (VM_Version::supports_evex()) {
4358     emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
4359   } else {
4360     emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
4361   }
4362 }
4363 
4364 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4365   assert(UseAVX > 0, "requires some form of AVX");
4366   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
4367                  (VM_Version::supports_avx512bw() == false));
4368 }
4369 
4370 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4371   assert(UseAVX > 0, "requires some form of AVX");
4372   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
4373                  (VM_Version::supports_avx512bw() == false));
4374 }
4375 
4376 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4377   assert(UseAVX > 0, "requires some form of AVX");
4378   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4379 }
4380 
4381 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4382   assert(UseAVX > 0, "requires some form of AVX");
4383   if (VM_Version::supports_evex()) {
4384     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4385   } else {
4386     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4387   }
4388 }
4389 
4390 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4391   assert(UseAVX > 0, "requires some form of AVX");
4392   if (VM_Version::supports_evex()) {
4393     tuple_type = EVEX_FVM;
4394   }
4395   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
4396                  (VM_Version::supports_avx512bw() == false));
4397 }
4398 
4399 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4400   assert(UseAVX > 0, "requires some form of AVX");
4401   if (VM_Version::supports_evex()) {
4402     tuple_type = EVEX_FVM;
4403   }
4404   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
4405                  (VM_Version::supports_avx512bw() == false));
4406 }
4407 
4408 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4409   assert(UseAVX > 0, "requires some form of AVX");
4410   if (VM_Version::supports_evex()) {
4411     tuple_type = EVEX_FV;
4412     input_size_in_bits = EVEX_32bit;
4413   }
4414   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
4415 }
4416 
4417 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4418   assert(UseAVX > 0, "requires some form of AVX");
4419   if (VM_Version::supports_evex()) {
4420     tuple_type = EVEX_FV;
4421     input_size_in_bits = EVEX_64bit;
4422     emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4423   } else {
4424     emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
4425   }
4426 }
4427 
4428 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
4429   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4430   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66,
4431                   (VM_Version::supports_avx512bw() == false));
4432 }
4433 
4434 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
4435   assert(VM_Version::supports_sse4_1(), "");
4436   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
4437                                       false, VEX_OPCODE_0F_38);
4438   emit_int8(0x40);
4439   emit_int8((unsigned char)(0xC0 | encode));
4440 }
4441 
4442 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4443   assert(UseAVX > 0, "requires some form of AVX");
4444   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len,
4445                  (VM_Version::supports_avx512bw() == false));
4446 }
4447 
4448 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4449   assert(UseAVX > 0, "requires some form of AVX");
4450   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
4451                                      vector_len, VEX_OPCODE_0F_38);
4452   emit_int8(0x40);
4453   emit_int8((unsigned char)(0xC0 | encode));
4454 }
4455 
4456 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4457   assert(UseAVX > 2, "requires some form of AVX");
4458   int src_enc = src->encoding();
4459   int dst_enc = dst->encoding();
4460   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4461   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
4462                                      VEX_OPCODE_0F_38, true, vector_len, false, false);
4463   emit_int8(0x40);
4464   emit_int8((unsigned char)(0xC0 | encode));
4465 }
4466 
4467 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4468   assert(UseAVX > 0, "requires some form of AVX");
4469   if (VM_Version::supports_evex()) {
4470     tuple_type = EVEX_FVM;
4471   }
4472   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
4473 }
4474 
4475 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4476   assert(UseAVX > 0, "requires some form of AVX");
4477   if (VM_Version::supports_evex()) {
4478     tuple_type = EVEX_FV;
4479     input_size_in_bits = EVEX_32bit;
4480   }
4481   InstructionMark im(this);
4482   int dst_enc = dst->encoding();
4483   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4484   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
4485              VEX_OPCODE_0F_38, false, vector_len);
4486   emit_int8(0x40);
4487   emit_operand(dst, src);
4488 }
4489 
4490 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4491   assert(UseAVX > 0, "requires some form of AVX");
4492   if (VM_Version::supports_evex()) {
4493     tuple_type = EVEX_FV;
4494     input_size_in_bits = EVEX_64bit;
4495   }
4496   InstructionMark im(this);
4497   int dst_enc = dst->encoding();
4498   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4499   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
4500   emit_int8(0x40);
4501   emit_operand(dst, src);
4502 }
4503 
4504 // Shift packed integers left by specified number of bits.
4505 void Assembler::psllw(XMMRegister dst, int shift) {
4506   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4507   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4508   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4509                                       false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
4510   emit_int8(0x71);
4511   emit_int8((unsigned char)(0xC0 | encode));
4512   emit_int8(shift & 0xFF);
4513 }
4514 
4515 void Assembler::pslld(XMMRegister dst, int shift) {
4516   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4517   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4518   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
4519   emit_int8(0x72);
4520   emit_int8((unsigned char)(0xC0 | encode));
4521   emit_int8(shift & 0xFF);
4522 }
4523 
4524 void Assembler::psllq(XMMRegister dst, int shift) {
4525   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4526   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4527   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4528   emit_int8(0x73);
4529   emit_int8((unsigned char)(0xC0 | encode));
4530   emit_int8(shift & 0xFF);
4531 }
4532 
4533 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
4534   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4535   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, false,
4536                   (VM_Version::supports_avx512bw() == false));
4537 }
4538 
4539 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
4540   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4541   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
4542 }
4543 
4544 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
4545   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4546   if (VM_Version::supports_evex()) {
4547     emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
4548   } else {
4549     emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
4550   }
4551 }
4552 
4553 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4554   assert(UseAVX > 0, "requires some form of AVX");
4555   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
4556   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len,
4557                  (VM_Version::supports_avx512bw() == false));
4558   emit_int8(shift & 0xFF);
4559 }
4560 
4561 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4562   assert(UseAVX > 0, "requires some form of AVX");
4563   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
4564   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
4565   emit_int8(shift & 0xFF);
4566 }
4567 
4568 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4569   assert(UseAVX > 0, "requires some form of AVX");
4570   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
4571   if (VM_Version::supports_evex()) {
4572     emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4573   } else {
4574     emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
4575   }
4576   emit_int8(shift & 0xFF);
4577 }
4578 
4579 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4580   assert(UseAVX > 0, "requires some form of AVX");
4581   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len,
4582                  (VM_Version::supports_avx512bw() == false));
4583 }
4584 
4585 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4586   assert(UseAVX > 0, "requires some form of AVX");
4587   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
4588 }
4589 
4590 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4591   assert(UseAVX > 0, "requires some form of AVX");
4592   if (VM_Version::supports_evex()) {
4593     emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4594   } else {
4595     emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
4596   }
4597 }
4598 
4599 // Shift packed integers logically right by specified number of bits.
4600 void Assembler::psrlw(XMMRegister dst, int shift) {
4601   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4602   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
4603   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4604                                       (VM_Version::supports_avx512bw() == false));
4605   emit_int8(0x71);
4606   emit_int8((unsigned char)(0xC0 | encode));
4607   emit_int8(shift & 0xFF);
4608 }
4609 
4610 void Assembler::psrld(XMMRegister dst, int shift) {
4611   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4612   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
4613   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
4614   emit_int8(0x72);
4615   emit_int8((unsigned char)(0xC0 | encode));
4616   emit_int8(shift & 0xFF);
4617 }
4618 
4619 void Assembler::psrlq(XMMRegister dst, int shift) {
4620   // Do not confuse it with psrldq SSE2 instruction which
4621   // shifts 128 bit value in xmm register by number of bytes.
4622   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4623   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4624   int encode = 0;
4625   if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) {
4626     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false);
4627   } else {
4628     encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
4629   }
4630   emit_int8(0x73);
4631   emit_int8((unsigned char)(0xC0 | encode));
4632   emit_int8(shift & 0xFF);
4633 }
4634 
4635 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
4636   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4637   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, false,
4638                   (VM_Version::supports_avx512bw() == false));
4639 }
4640 
4641 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
4642   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4643   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
4644 }
4645 
4646 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
4647   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4648   if (VM_Version::supports_evex()) {
4649     emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
4650   } else {
4651     emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
4652   }
4653 }
4654 
4655 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4656   assert(UseAVX > 0, "requires some form of AVX");
4657   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4658   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len,
4659                  (VM_Version::supports_avx512bw() == false));
4660   emit_int8(shift & 0xFF);
4661 }
4662 
4663 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4664   assert(UseAVX > 0, "requires some form of AVX");
4665   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4666   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
4667   emit_int8(shift & 0xFF);
4668 }
4669 
4670 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4671   assert(UseAVX > 0, "requires some form of AVX");
4672   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
4673   if (VM_Version::supports_evex()) {
4674     emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4675   } else {
4676     emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
4677   }
4678   emit_int8(shift & 0xFF);
4679 }
4680 
4681 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4682   assert(UseAVX > 0, "requires some form of AVX");
4683   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len,
4684                  (VM_Version::supports_avx512bw() == false));
4685 }
4686 
4687 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4688   assert(UseAVX > 0, "requires some form of AVX");
4689   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
4690 }
4691 
4692 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4693   assert(UseAVX > 0, "requires some form of AVX");
4694   if (VM_Version::supports_evex()) {
4695     emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4696   } else {
4697     emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
4698   }
4699 }
4700 
4701 // Shift packed integers arithmetically right by specified number of bits.
4702 void Assembler::psraw(XMMRegister dst, int shift) {
4703   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4704   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4705   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
4706                                       (VM_Version::supports_avx512bw() == false));
4707   emit_int8(0x71);
4708   emit_int8((unsigned char)(0xC0 | encode));
4709   emit_int8(shift & 0xFF);
4710 }
4711 
4712 void Assembler::psrad(XMMRegister dst, int shift) {
4713   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4714   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
4715   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
4716   emit_int8(0x72);
4717   emit_int8((unsigned char)(0xC0 | encode));
4718   emit_int8(shift & 0xFF);
4719 }
4720 
4721 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
4722   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4723   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66,
4724                   (VM_Version::supports_avx512bw() == false));
4725 }
4726 
4727 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
4728   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4729   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
4730 }
4731 
4732 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4733   assert(UseAVX > 0, "requires some form of AVX");
4734   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4735   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len,
4736                  (VM_Version::supports_avx512bw() == false));
4737   emit_int8(shift & 0xFF);
4738 }
4739 
4740 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
4741   assert(UseAVX > 0, "requires some form of AVX");
4742   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
4743   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
4744   emit_int8(shift & 0xFF);
4745 }
4746 
4747 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4748   assert(UseAVX > 0, "requires some form of AVX");
4749   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len,
4750                  (VM_Version::supports_avx512bw() == false));
4751 }
4752 
4753 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
4754   assert(UseAVX > 0, "requires some form of AVX");
4755   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
4756 }
4757 
4758 
4759 // AND packed integers
4760 void Assembler::pand(XMMRegister dst, XMMRegister src) {
4761   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4762   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
4763 }
4764 
4765 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4766   assert(UseAVX > 0, "requires some form of AVX");
4767   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4768 }
4769 
4770 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4771   assert(UseAVX > 0, "requires some form of AVX");
4772   if (VM_Version::supports_evex()) {
4773     tuple_type = EVEX_FV;
4774     input_size_in_bits = EVEX_32bit;
4775   }
4776   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
4777 }
4778 
4779 void Assembler::por(XMMRegister dst, XMMRegister src) {
4780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4781   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
4782 }
4783 
4784 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4785   assert(UseAVX > 0, "requires some form of AVX");
4786   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4787 }
4788 
4789 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4790   assert(UseAVX > 0, "requires some form of AVX");
4791   if (VM_Version::supports_evex()) {
4792     tuple_type = EVEX_FV;
4793     input_size_in_bits = EVEX_32bit;
4794   }
4795   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
4796 }
4797 
4798 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
4799   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4800   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
4801 }
4802 
4803 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4804   assert(UseAVX > 0, "requires some form of AVX");
4805   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4806 }
4807 
4808 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
4809   assert(UseAVX > 0, "requires some form of AVX");
4810   if (VM_Version::supports_evex()) {
4811     tuple_type = EVEX_FV;
4812     input_size_in_bits = EVEX_32bit;
4813   }
4814   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
4815 }
4816 
4817 
4818 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4819   assert(VM_Version::supports_avx(), "");
4820   int vector_len = AVX_256bit;
4821   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4822   emit_int8(0x18);
4823   emit_int8((unsigned char)(0xC0 | encode));
4824   // 0x00 - insert into lower 128 bits
4825   // 0x01 - insert into upper 128 bits
4826   emit_int8(0x01);
4827 }
4828 
4829 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4830   assert(VM_Version::supports_evex(), "");
4831   int vector_len = AVX_512bit;
4832   int src_enc = src->encoding();
4833   int dst_enc = dst->encoding();
4834   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4835   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
4836                                      VEX_OPCODE_0F_3A, true, vector_len, false, false);
4837   emit_int8(0x1A);
4838   emit_int8((unsigned char)(0xC0 | encode));
4839   // 0x00 - insert into lower 256 bits
4840   // 0x01 - insert into upper 256 bits
4841   emit_int8(0x01);
4842 }
4843 
4844 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
4845   assert(VM_Version::supports_avx(), "");
4846   if (VM_Version::supports_evex()) {
4847     tuple_type = EVEX_T4;
4848     input_size_in_bits = EVEX_64bit;
4849   }
4850   InstructionMark im(this);
4851   int vector_len = AVX_512bit;
4852   assert(dst != xnoreg, "sanity");
4853   int dst_enc = dst->encoding();
4854   // swap src<->dst for encoding
4855   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len);
4856   emit_int8(0x1A);
4857   emit_operand(dst, src);
4858   // 0x01 - insert into upper 128 bits
4859   emit_int8(0x01);
4860 }
4861 
4862 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
4863   assert(VM_Version::supports_avx(), "");
4864   if (VM_Version::supports_evex()) {
4865     tuple_type = EVEX_T4;
4866     input_size_in_bits = EVEX_32bit;
4867   }
4868   InstructionMark im(this);
4869   int vector_len = AVX_256bit;
4870   assert(dst != xnoreg, "sanity");
4871   int dst_enc = dst->encoding();
4872   // swap src<->dst for encoding
4873   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4874   emit_int8(0x18);
4875   emit_operand(dst, src);
4876   // 0x01 - insert into upper 128 bits
4877   emit_int8(0x01);
4878 }
4879 
4880 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
4881   assert(VM_Version::supports_avx(), "");
4882   int vector_len = AVX_256bit;
4883   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4884   emit_int8(0x19);
4885   emit_int8((unsigned char)(0xC0 | encode));
4886   // 0x00 - insert into lower 128 bits
4887   // 0x01 - insert into upper 128 bits
4888   emit_int8(0x01);
4889 }
4890 
4891 void Assembler::vextractf128h(Address dst, XMMRegister src) {
4892   assert(VM_Version::supports_avx(), "");
4893   if (VM_Version::supports_evex()) {
4894     tuple_type = EVEX_T4;
4895     input_size_in_bits = EVEX_32bit;
4896   }
4897   InstructionMark im(this);
4898   int vector_len = AVX_256bit;
4899   assert(src != xnoreg, "sanity");
4900   int src_enc = src->encoding();
4901   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4902   emit_int8(0x19);
4903   emit_operand(src, dst);
4904   // 0x01 - extract from upper 128 bits
4905   emit_int8(0x01);
4906 }
4907 
4908 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4909   assert(VM_Version::supports_avx2(), "");
4910   int vector_len = AVX_256bit;
4911   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4912   emit_int8(0x38);
4913   emit_int8((unsigned char)(0xC0 | encode));
4914   // 0x00 - insert into lower 128 bits
4915   // 0x01 - insert into upper 128 bits
4916   emit_int8(0x01);
4917 }
4918 
4919 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4920   assert(VM_Version::supports_evex(), "");
4921   int vector_len = AVX_512bit;
4922   int src_enc = src->encoding();
4923   int dst_enc = dst->encoding();
4924   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4925   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4926                                      VM_Version::supports_avx512dq(), vector_len, false, false);
4927   emit_int8(0x38);
4928   emit_int8((unsigned char)(0xC0 | encode));
4929   // 0x00 - insert into lower 256 bits
4930   // 0x01 - insert into upper 256 bits
4931   emit_int8(0x01);
4932 }
4933 
4934 void Assembler::vinserti128h(XMMRegister dst, Address src) {
4935   assert(VM_Version::supports_avx2(), "");
4936   if (VM_Version::supports_evex()) {
4937     tuple_type = EVEX_T4;
4938     input_size_in_bits = EVEX_32bit;
4939   }
4940   InstructionMark im(this);
4941   int vector_len = AVX_256bit;
4942   assert(dst != xnoreg, "sanity");
4943   int dst_enc = dst->encoding();
4944   // swap src<->dst for encoding
4945   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4946   emit_int8(0x38);
4947   emit_operand(dst, src);
4948   // 0x01 - insert into upper 128 bits
4949   emit_int8(0x01);
4950 }
4951 
4952 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
4953   assert(VM_Version::supports_avx(), "");
4954   int vector_len = AVX_256bit;
4955   int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
4956   emit_int8(0x39);
4957   emit_int8((unsigned char)(0xC0 | encode));
4958   // 0x00 - insert into lower 128 bits
4959   // 0x01 - insert into upper 128 bits
4960   emit_int8(0x01);
4961 }
4962 
4963 void Assembler::vextracti128h(Address dst, XMMRegister src) {
4964   assert(VM_Version::supports_avx2(), "");
4965   if (VM_Version::supports_evex()) {
4966     tuple_type = EVEX_T4;
4967     input_size_in_bits = EVEX_32bit;
4968   }
4969   InstructionMark im(this);
4970   int vector_len = AVX_256bit;
4971   assert(src != xnoreg, "sanity");
4972   int src_enc = src->encoding();
4973   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
4974   emit_int8(0x39);
4975   emit_operand(src, dst);
4976   // 0x01 - extract from upper 128 bits
4977   emit_int8(0x01);
4978 }
4979 
4980 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
4981   assert(VM_Version::supports_evex(), "");
4982   int vector_len = AVX_512bit;
4983   int src_enc = src->encoding();
4984   int dst_enc = dst->encoding();
4985   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4986                                      true, vector_len, false, false);
4987   emit_int8(0x3B);
4988   emit_int8((unsigned char)(0xC0 | encode));
4989   // 0x01 - extract from upper 256 bits
4990   emit_int8(0x01);
4991 }
4992 
4993 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
4994   assert(VM_Version::supports_evex(), "");
4995   int vector_len = AVX_512bit;
4996   int src_enc = src->encoding();
4997   int dst_enc = dst->encoding();
4998   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
4999                                      VM_Version::supports_avx512dq(), vector_len, false, false);
5000   emit_int8(0x39);
5001   emit_int8((unsigned char)(0xC0 | encode));
5002   // 0x01 - extract from bits 255:128
5003   // 0x02 - extract from bits 383:256
5004   // 0x03 - extract from bits 511:384
5005   emit_int8(value & 0x3);
5006 }
5007 
5008 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
5009   assert(VM_Version::supports_evex(), "");
5010   int vector_len = AVX_512bit;
5011   int src_enc = src->encoding();
5012   int dst_enc = dst->encoding();
5013   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5014                                      VM_Version::supports_avx512dq(), vector_len, false, false);
5015   emit_int8(0x1B);
5016   emit_int8((unsigned char)(0xC0 | encode));
5017   // 0x01 - extract from upper 256 bits
5018   emit_int8(0x01);
5019 }
5020 
5021 void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
5022   assert(VM_Version::supports_avx2(), "");
5023   tuple_type = EVEX_T4;
5024   input_size_in_bits = EVEX_64bit;
5025   InstructionMark im(this);
5026   int vector_len = AVX_512bit;
5027   assert(src != xnoreg, "sanity");
5028   int src_enc = src->encoding();
5029   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5030              VM_Version::supports_avx512dq(), vector_len);
5031   emit_int8(0x1B);
5032   emit_operand(src, dst);
5033   // 0x01 - extract from upper 128 bits
5034   emit_int8(0x01);
5035 }
5036 
5037 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
5038   assert(VM_Version::supports_evex(), "");
5039   int vector_len = AVX_512bit;
5040   int src_enc = src->encoding();
5041   int dst_enc = dst->encoding();
5042   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66,
5043                                      VEX_OPCODE_0F_3A, false, vector_len, false, false);
5044   emit_int8(0x19);
5045   emit_int8((unsigned char)(0xC0 | encode));
5046   // 0x01 - extract from bits 255:128
5047   // 0x02 - extract from bits 383:256
5048   // 0x03 - extract from bits 511:384
5049   emit_int8(value & 0x3);
5050 }
5051 
5052 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
5053   assert(VM_Version::supports_evex(), "");
5054   int vector_len = AVX_512bit;
5055   int src_enc = src->encoding();
5056   int dst_enc = dst->encoding();
5057   int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
5058                                      VM_Version::supports_avx512dq(), vector_len, false, false);
5059   emit_int8(0x19);
5060   emit_int8((unsigned char)(0xC0 | encode));
5061   // 0x01 - extract from bits 255:128
5062   // 0x02 - extract from bits 383:256
5063   // 0x03 - extract from bits 511:384
5064   emit_int8(value & 0x3);
5065 }
5066 
5067 // duplicate 4-bytes integer data from src into 8 locations in dest
5068 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
5069   assert(VM_Version::supports_avx2(), "");
5070   int vector_len = AVX_256bit;
5071   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5072                                      vector_len, VEX_OPCODE_0F_38, false);
5073   emit_int8(0x58);
5074   emit_int8((unsigned char)(0xC0 | encode));
5075 }
5076 
5077 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5078 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
5079   assert(VM_Version::supports_evex(), "");
5080   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5081                                      vector_len, VEX_OPCODE_0F_38, false);
5082   emit_int8(0x78);
5083   emit_int8((unsigned char)(0xC0 | encode));
5084 }
5085 
5086 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
5087   assert(VM_Version::supports_evex(), "");
5088   tuple_type = EVEX_T1S;
5089   input_size_in_bits = EVEX_8bit;
5090   InstructionMark im(this);
5091   assert(dst != xnoreg, "sanity");
5092   int dst_enc = dst->encoding();
5093   // swap src<->dst for encoding
5094   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
5095   emit_int8(0x78);
5096   emit_operand(dst, src);
5097 }
5098 
5099 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5100 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
5101   assert(VM_Version::supports_evex(), "");
5102   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5103                                      vector_len, VEX_OPCODE_0F_38, false);
5104   emit_int8(0x79);
5105   emit_int8((unsigned char)(0xC0 | encode));
5106 }
5107 
5108 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
5109   assert(VM_Version::supports_evex(), "");
5110   tuple_type = EVEX_T1S;
5111   input_size_in_bits = EVEX_16bit;
5112   InstructionMark im(this);
5113   assert(dst != xnoreg, "sanity");
5114   int dst_enc = dst->encoding();
5115   // swap src<->dst for encoding
5116   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
5117   emit_int8(0x79);
5118   emit_operand(dst, src);
5119 }
5120 
5121 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5122 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
5123   assert(VM_Version::supports_evex(), "");
5124   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
5125                                      vector_len, VEX_OPCODE_0F_38, false);
5126   emit_int8(0x58);
5127   emit_int8((unsigned char)(0xC0 | encode));
5128 }
5129 
5130 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
5131   assert(VM_Version::supports_evex(), "");
5132   tuple_type = EVEX_T1S;
5133   input_size_in_bits = EVEX_32bit;
5134   InstructionMark im(this);
5135   assert(dst != xnoreg, "sanity");
5136   int dst_enc = dst->encoding();
5137   // swap src<->dst for encoding
5138   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
5139   emit_int8(0x58);
5140   emit_operand(dst, src);
5141 }
5142 
5143 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5144 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
5145   assert(VM_Version::supports_evex(), "");
5146   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5147                                      VEX_OPCODE_0F_38, true, vector_len, false, false);
5148   emit_int8(0x59);
5149   emit_int8((unsigned char)(0xC0 | encode));
5150 }
5151 
5152 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
5153   assert(VM_Version::supports_evex(), "");
5154   tuple_type = EVEX_T1S;
5155   input_size_in_bits = EVEX_64bit;
5156   InstructionMark im(this);
5157   assert(dst != xnoreg, "sanity");
5158   int dst_enc = dst->encoding();
5159   // swap src<->dst for encoding
5160   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
5161   emit_int8(0x59);
5162   emit_operand(dst, src);
5163 }
5164 
5165 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
5166 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
5167   assert(VM_Version::supports_evex(), "");
5168   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5169                                      VEX_OPCODE_0F_38, false, vector_len, false, false);
5170   emit_int8(0x18);
5171   emit_int8((unsigned char)(0xC0 | encode));
5172 }
5173 
5174 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
5175   assert(VM_Version::supports_evex(), "");
5176   tuple_type = EVEX_T1S;
5177   input_size_in_bits = EVEX_32bit;
5178   InstructionMark im(this);
5179   assert(dst != xnoreg, "sanity");
5180   int dst_enc = dst->encoding();
5181   // swap src<->dst for encoding
5182   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
5183   emit_int8(0x18);
5184   emit_operand(dst, src);
5185 }
5186 
5187 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
5188 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
5189   assert(VM_Version::supports_evex(), "");
5190   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5191                                      VEX_OPCODE_0F_38, true, vector_len, false, false);
5192   emit_int8(0x19);
5193   emit_int8((unsigned char)(0xC0 | encode));
5194 }
5195 
5196 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
5197   assert(VM_Version::supports_evex(), "");
5198   tuple_type = EVEX_T1S;
5199   input_size_in_bits = EVEX_64bit;
5200   InstructionMark im(this);
5201   assert(dst != xnoreg, "sanity");
5202   int dst_enc = dst->encoding();
5203   // swap src<->dst for encoding
5204   vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
5205   emit_int8(0x19);
5206   emit_operand(dst, src);
5207 }
5208 
5209 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
5210 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
5211   assert(VM_Version::supports_evex(), "");
5212   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5213                                      VEX_OPCODE_0F_38, false, vector_len, false, false);
5214   emit_int8(0x7A);
5215   emit_int8((unsigned char)(0xC0 | encode));
5216 }
5217 
5218 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
5219 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
5220   assert(VM_Version::supports_evex(), "");
5221   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5222                                      VEX_OPCODE_0F_38, false, vector_len, false, false);
5223   emit_int8(0x7B);
5224   emit_int8((unsigned char)(0xC0 | encode));
5225 }
5226 
5227 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5228 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
5229   assert(VM_Version::supports_evex(), "");
5230   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5231                                      VEX_OPCODE_0F_38, false, vector_len, false, false);
5232   emit_int8(0x7C);
5233   emit_int8((unsigned char)(0xC0 | encode));
5234 }
5235 
5236 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
5237 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
5238   assert(VM_Version::supports_evex(), "");
5239   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
5240                                      VEX_OPCODE_0F_38, true, vector_len, false, false);
5241   emit_int8(0x7C);
5242   emit_int8((unsigned char)(0xC0 | encode));
5243 }
5244 
5245 // Carry-Less Multiplication Quadword
5246 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
5247   assert(VM_Version::supports_clmul(), "");
5248   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
5249                                       VEX_OPCODE_0F_3A, false, AVX_128bit, true);
5250   emit_int8(0x44);
5251   emit_int8((unsigned char)(0xC0 | encode));
5252   emit_int8((unsigned char)mask);
5253 }
5254 
5255 // Carry-Less Multiplication Quadword
5256 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
5257   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
5258   int vector_len = AVX_128bit;
5259   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
5260                                      vector_len, VEX_OPCODE_0F_3A, true);
5261   emit_int8(0x44);
5262   emit_int8((unsigned char)(0xC0 | encode));
5263   emit_int8((unsigned char)mask);
5264 }
5265 
5266 void Assembler::vzeroupper() {
5267   assert(VM_Version::supports_avx(), "");
5268   if (UseAVX < 3)
5269   {
5270     (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
5271     emit_int8(0x77);
5272   }
5273 }
5274 
5275 
5276 #ifndef _LP64
5277 // 32bit only pieces of the assembler
5278 
5279 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5280   // NO PREFIX AS NEVER 64BIT
5281   InstructionMark im(this);
5282   emit_int8((unsigned char)0x81);
5283   emit_int8((unsigned char)(0xF8 | src1->encoding()));
5284   emit_data(imm32, rspec, 0);
5285 }
5286 
5287 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5288   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
5289   InstructionMark im(this);
5290   emit_int8((unsigned char)0x81);
5291   emit_operand(rdi, src1);
5292   emit_data(imm32, rspec, 0);
5293 }
5294 
5295 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
5296 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
5297 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
5298 void Assembler::cmpxchg8(Address adr) {
5299   InstructionMark im(this);
5300   emit_int8(0x0F);
5301   emit_int8((unsigned char)0xC7);
5302   emit_operand(rcx, adr);
5303 }
5304 
5305 void Assembler::decl(Register dst) {
5306   // Don't use it directly. Use MacroAssembler::decrementl() instead.
5307  emit_int8(0x48 | dst->encoding());
5308 }
5309 
5310 #endif // _LP64
5311 
5312 // 64bit typically doesn't use the x87 but needs to for the trig funcs
5313 
5314 void Assembler::fabs() {
5315   emit_int8((unsigned char)0xD9);
5316   emit_int8((unsigned char)0xE1);
5317 }
5318 
5319 void Assembler::fadd(int i) {
5320   emit_farith(0xD8, 0xC0, i);
5321 }
5322 
5323 void Assembler::fadd_d(Address src) {
5324   InstructionMark im(this);
5325   emit_int8((unsigned char)0xDC);
5326   emit_operand32(rax, src);
5327 }
5328 
5329 void Assembler::fadd_s(Address src) {
5330   InstructionMark im(this);
5331   emit_int8((unsigned char)0xD8);
5332   emit_operand32(rax, src);
5333 }
5334 
5335 void Assembler::fadda(int i) {
5336   emit_farith(0xDC, 0xC0, i);
5337 }
5338 
5339 void Assembler::faddp(int i) {
5340   emit_farith(0xDE, 0xC0, i);
5341 }
5342 
5343 void Assembler::fchs() {
5344   emit_int8((unsigned char)0xD9);
5345   emit_int8((unsigned char)0xE0);
5346 }
5347 
5348 void Assembler::fcom(int i) {
5349   emit_farith(0xD8, 0xD0, i);
5350 }
5351 
5352 void Assembler::fcomp(int i) {
5353   emit_farith(0xD8, 0xD8, i);
5354 }
5355 
5356 void Assembler::fcomp_d(Address src) {
5357   InstructionMark im(this);
5358   emit_int8((unsigned char)0xDC);
5359   emit_operand32(rbx, src);
5360 }
5361 
5362 void Assembler::fcomp_s(Address src) {
5363   InstructionMark im(this);
5364   emit_int8((unsigned char)0xD8);
5365   emit_operand32(rbx, src);
5366 }
5367 
5368 void Assembler::fcompp() {
5369   emit_int8((unsigned char)0xDE);
5370   emit_int8((unsigned char)0xD9);
5371 }
5372 
5373 void Assembler::fcos() {
5374   emit_int8((unsigned char)0xD9);
5375   emit_int8((unsigned char)0xFF);
5376 }
5377 
5378 void Assembler::fdecstp() {
5379   emit_int8((unsigned char)0xD9);
5380   emit_int8((unsigned char)0xF6);
5381 }
5382 
5383 void Assembler::fdiv(int i) {
5384   emit_farith(0xD8, 0xF0, i);
5385 }
5386 
5387 void Assembler::fdiv_d(Address src) {
5388   InstructionMark im(this);
5389   emit_int8((unsigned char)0xDC);
5390   emit_operand32(rsi, src);
5391 }
5392 
5393 void Assembler::fdiv_s(Address src) {
5394   InstructionMark im(this);
5395   emit_int8((unsigned char)0xD8);
5396   emit_operand32(rsi, src);
5397 }
5398 
5399 void Assembler::fdiva(int i) {
5400   emit_farith(0xDC, 0xF8, i);
5401 }
5402 
5403 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
5404 //       is erroneous for some of the floating-point instructions below.
5405 
5406 void Assembler::fdivp(int i) {
5407   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
5408 }
5409 
5410 void Assembler::fdivr(int i) {
5411   emit_farith(0xD8, 0xF8, i);
5412 }
5413 
5414 void Assembler::fdivr_d(Address src) {
5415   InstructionMark im(this);
5416   emit_int8((unsigned char)0xDC);
5417   emit_operand32(rdi, src);
5418 }
5419 
5420 void Assembler::fdivr_s(Address src) {
5421   InstructionMark im(this);
5422   emit_int8((unsigned char)0xD8);
5423   emit_operand32(rdi, src);
5424 }
5425 
5426 void Assembler::fdivra(int i) {
5427   emit_farith(0xDC, 0xF0, i);
5428 }
5429 
5430 void Assembler::fdivrp(int i) {
5431   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
5432 }
5433 
5434 void Assembler::ffree(int i) {
5435   emit_farith(0xDD, 0xC0, i);
5436 }
5437 
5438 void Assembler::fild_d(Address adr) {
5439   InstructionMark im(this);
5440   emit_int8((unsigned char)0xDF);
5441   emit_operand32(rbp, adr);
5442 }
5443 
5444 void Assembler::fild_s(Address adr) {
5445   InstructionMark im(this);
5446   emit_int8((unsigned char)0xDB);
5447   emit_operand32(rax, adr);
5448 }
5449 
5450 void Assembler::fincstp() {
5451   emit_int8((unsigned char)0xD9);
5452   emit_int8((unsigned char)0xF7);
5453 }
5454 
5455 void Assembler::finit() {
5456   emit_int8((unsigned char)0x9B);
5457   emit_int8((unsigned char)0xDB);
5458   emit_int8((unsigned char)0xE3);
5459 }
5460 
5461 void Assembler::fist_s(Address adr) {
5462   InstructionMark im(this);
5463   emit_int8((unsigned char)0xDB);
5464   emit_operand32(rdx, adr);
5465 }
5466 
5467 void Assembler::fistp_d(Address adr) {
5468   InstructionMark im(this);
5469   emit_int8((unsigned char)0xDF);
5470   emit_operand32(rdi, adr);
5471 }
5472 
5473 void Assembler::fistp_s(Address adr) {
5474   InstructionMark im(this);
5475   emit_int8((unsigned char)0xDB);
5476   emit_operand32(rbx, adr);
5477 }
5478 
5479 void Assembler::fld1() {
5480   emit_int8((unsigned char)0xD9);
5481   emit_int8((unsigned char)0xE8);
5482 }
5483 
5484 void Assembler::fld_d(Address adr) {
5485   InstructionMark im(this);
5486   emit_int8((unsigned char)0xDD);
5487   emit_operand32(rax, adr);
5488 }
5489 
5490 void Assembler::fld_s(Address adr) {
5491   InstructionMark im(this);
5492   emit_int8((unsigned char)0xD9);
5493   emit_operand32(rax, adr);
5494 }
5495 
5496 
5497 void Assembler::fld_s(int index) {
5498   emit_farith(0xD9, 0xC0, index);
5499 }
5500 
5501 void Assembler::fld_x(Address adr) {
5502   InstructionMark im(this);
5503   emit_int8((unsigned char)0xDB);
5504   emit_operand32(rbp, adr);
5505 }
5506 
5507 void Assembler::fldcw(Address src) {
5508   InstructionMark im(this);
5509   emit_int8((unsigned char)0xD9);
5510   emit_operand32(rbp, src);
5511 }
5512 
5513 void Assembler::fldenv(Address src) {
5514   InstructionMark im(this);
5515   emit_int8((unsigned char)0xD9);
5516   emit_operand32(rsp, src);
5517 }
5518 
5519 void Assembler::fldlg2() {
5520   emit_int8((unsigned char)0xD9);
5521   emit_int8((unsigned char)0xEC);
5522 }
5523 
5524 void Assembler::fldln2() {
5525   emit_int8((unsigned char)0xD9);
5526   emit_int8((unsigned char)0xED);
5527 }
5528 
5529 void Assembler::fldz() {
5530   emit_int8((unsigned char)0xD9);
5531   emit_int8((unsigned char)0xEE);
5532 }
5533 
5534 void Assembler::flog() {
5535   fldln2();
5536   fxch();
5537   fyl2x();
5538 }
5539 
5540 void Assembler::flog10() {
5541   fldlg2();
5542   fxch();
5543   fyl2x();
5544 }
5545 
5546 void Assembler::fmul(int i) {
5547   emit_farith(0xD8, 0xC8, i);
5548 }
5549 
5550 void Assembler::fmul_d(Address src) {
5551   InstructionMark im(this);
5552   emit_int8((unsigned char)0xDC);
5553   emit_operand32(rcx, src);
5554 }
5555 
5556 void Assembler::fmul_s(Address src) {
5557   InstructionMark im(this);
5558   emit_int8((unsigned char)0xD8);
5559   emit_operand32(rcx, src);
5560 }
5561 
5562 void Assembler::fmula(int i) {
5563   emit_farith(0xDC, 0xC8, i);
5564 }
5565 
5566 void Assembler::fmulp(int i) {
5567   emit_farith(0xDE, 0xC8, i);
5568 }
5569 
5570 void Assembler::fnsave(Address dst) {
5571   InstructionMark im(this);
5572   emit_int8((unsigned char)0xDD);
5573   emit_operand32(rsi, dst);
5574 }
5575 
5576 void Assembler::fnstcw(Address src) {
5577   InstructionMark im(this);
5578   emit_int8((unsigned char)0x9B);
5579   emit_int8((unsigned char)0xD9);
5580   emit_operand32(rdi, src);
5581 }
5582 
5583 void Assembler::fnstsw_ax() {
5584   emit_int8((unsigned char)0xDF);
5585   emit_int8((unsigned char)0xE0);
5586 }
5587 
5588 void Assembler::fprem() {
5589   emit_int8((unsigned char)0xD9);
5590   emit_int8((unsigned char)0xF8);
5591 }
5592 
5593 void Assembler::fprem1() {
5594   emit_int8((unsigned char)0xD9);
5595   emit_int8((unsigned char)0xF5);
5596 }
5597 
5598 void Assembler::frstor(Address src) {
5599   InstructionMark im(this);
5600   emit_int8((unsigned char)0xDD);
5601   emit_operand32(rsp, src);
5602 }
5603 
5604 void Assembler::fsin() {
5605   emit_int8((unsigned char)0xD9);
5606   emit_int8((unsigned char)0xFE);
5607 }
5608 
5609 void Assembler::fsqrt() {
5610   emit_int8((unsigned char)0xD9);
5611   emit_int8((unsigned char)0xFA);
5612 }
5613 
5614 void Assembler::fst_d(Address adr) {
5615   InstructionMark im(this);
5616   emit_int8((unsigned char)0xDD);
5617   emit_operand32(rdx, adr);
5618 }
5619 
5620 void Assembler::fst_s(Address adr) {
5621   InstructionMark im(this);
5622   emit_int8((unsigned char)0xD9);
5623   emit_operand32(rdx, adr);
5624 }
5625 
5626 void Assembler::fstp_d(Address adr) {
5627   InstructionMark im(this);
5628   emit_int8((unsigned char)0xDD);
5629   emit_operand32(rbx, adr);
5630 }
5631 
5632 void Assembler::fstp_d(int index) {
5633   emit_farith(0xDD, 0xD8, index);
5634 }
5635 
5636 void Assembler::fstp_s(Address adr) {
5637   InstructionMark im(this);
5638   emit_int8((unsigned char)0xD9);
5639   emit_operand32(rbx, adr);
5640 }
5641 
5642 void Assembler::fstp_x(Address adr) {
5643   InstructionMark im(this);
5644   emit_int8((unsigned char)0xDB);
5645   emit_operand32(rdi, adr);
5646 }
5647 
5648 void Assembler::fsub(int i) {
5649   emit_farith(0xD8, 0xE0, i);
5650 }
5651 
5652 void Assembler::fsub_d(Address src) {
5653   InstructionMark im(this);
5654   emit_int8((unsigned char)0xDC);
5655   emit_operand32(rsp, src);
5656 }
5657 
5658 void Assembler::fsub_s(Address src) {
5659   InstructionMark im(this);
5660   emit_int8((unsigned char)0xD8);
5661   emit_operand32(rsp, src);
5662 }
5663 
5664 void Assembler::fsuba(int i) {
5665   emit_farith(0xDC, 0xE8, i);
5666 }
5667 
5668 void Assembler::fsubp(int i) {
5669   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
5670 }
5671 
5672 void Assembler::fsubr(int i) {
5673   emit_farith(0xD8, 0xE8, i);
5674 }
5675 
5676 void Assembler::fsubr_d(Address src) {
5677   InstructionMark im(this);
5678   emit_int8((unsigned char)0xDC);
5679   emit_operand32(rbp, src);
5680 }
5681 
5682 void Assembler::fsubr_s(Address src) {
5683   InstructionMark im(this);
5684   emit_int8((unsigned char)0xD8);
5685   emit_operand32(rbp, src);
5686 }
5687 
5688 void Assembler::fsubra(int i) {
5689   emit_farith(0xDC, 0xE0, i);
5690 }
5691 
5692 void Assembler::fsubrp(int i) {
5693   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
5694 }
5695 
5696 void Assembler::ftan() {
5697   emit_int8((unsigned char)0xD9);
5698   emit_int8((unsigned char)0xF2);
5699   emit_int8((unsigned char)0xDD);
5700   emit_int8((unsigned char)0xD8);
5701 }
5702 
5703 void Assembler::ftst() {
5704   emit_int8((unsigned char)0xD9);
5705   emit_int8((unsigned char)0xE4);
5706 }
5707 
5708 void Assembler::fucomi(int i) {
5709   // make sure the instruction is supported (introduced for P6, together with cmov)
5710   guarantee(VM_Version::supports_cmov(), "illegal instruction");
5711   emit_farith(0xDB, 0xE8, i);
5712 }
5713 
5714 void Assembler::fucomip(int i) {
5715   // make sure the instruction is supported (introduced for P6, together with cmov)
5716   guarantee(VM_Version::supports_cmov(), "illegal instruction");
5717   emit_farith(0xDF, 0xE8, i);
5718 }
5719 
5720 void Assembler::fwait() {
5721   emit_int8((unsigned char)0x9B);
5722 }
5723 
5724 void Assembler::fxch(int i) {
5725   emit_farith(0xD9, 0xC8, i);
5726 }
5727 
5728 void Assembler::fyl2x() {
5729   emit_int8((unsigned char)0xD9);
5730   emit_int8((unsigned char)0xF1);
5731 }
5732 
5733 void Assembler::frndint() {
5734   emit_int8((unsigned char)0xD9);
5735   emit_int8((unsigned char)0xFC);
5736 }
5737 
5738 void Assembler::f2xm1() {
5739   emit_int8((unsigned char)0xD9);
5740   emit_int8((unsigned char)0xF0);
5741 }
5742 
5743 void Assembler::fldl2e() {
5744   emit_int8((unsigned char)0xD9);
5745   emit_int8((unsigned char)0xEA);
5746 }
5747 
5748 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
5749 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
5750 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
5751 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
5752 
5753 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
5754 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
5755   if (pre > 0) {
5756     emit_int8(simd_pre[pre]);
5757   }
5758   if (rex_w) {
5759     prefixq(adr, xreg);
5760   } else {
5761     prefix(adr, xreg);
5762   }
5763   if (opc > 0) {
5764     emit_int8(0x0F);
5765     int opc2 = simd_opc[opc];
5766     if (opc2 > 0) {
5767       emit_int8(opc2);
5768     }
5769   }
5770 }
5771 
5772 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
5773   if (pre > 0) {
5774     emit_int8(simd_pre[pre]);
5775   }
5776   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
5777                           prefix_and_encode(dst_enc, src_enc);
5778   if (opc > 0) {
5779     emit_int8(0x0F);
5780     int opc2 = simd_opc[opc];
5781     if (opc2 > 0) {
5782       emit_int8(opc2);
5783     }
5784   }
5785   return encode;
5786 }
5787 
5788 
5789 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
5790   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
5791     prefix(VEX_3bytes);
5792 
5793     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
5794     byte1 = (~byte1) & 0xE0;
5795     byte1 |= opc;
5796     emit_int8(byte1);
5797 
5798     int byte2 = ((~nds_enc) & 0xf) << 3;
5799     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
5800     emit_int8(byte2);
5801   } else {
5802     prefix(VEX_2bytes);
5803 
5804     int byte1 = vex_r ? VEX_R : 0;
5805     byte1 = (~byte1) & 0x80;
5806     byte1 |= ((~nds_enc) & 0xf) << 3;
5807     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
5808     emit_int8(byte1);
5809   }
5810 }
5811 
5812 // This is a 4 byte encoding
5813 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
5814                             int nds_enc, VexSimdPrefix pre, VexOpcode opc,
5815                             bool is_extended_context, bool is_merge_context,
5816                             int vector_len, bool no_mask_reg ){
5817   // EVEX 0x62 prefix
5818   prefix(EVEX_4bytes);
5819   evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
5820 
5821   // P0: byte 2, initialized to RXBR`00mm
5822   // instead of not'd
5823   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
5824   byte2 = (~byte2) & 0xF0;
5825   // confine opc opcode extensions in mm bits to lower two bits
5826   // of form {0F, 0F_38, 0F_3A}
5827   byte2 |= opc;
5828   emit_int8(byte2);
5829 
5830   // P1: byte 3 as Wvvvv1pp
5831   int byte3 = ((~nds_enc) & 0xf) << 3;
5832   // p[10] is always 1
5833   byte3 |= EVEX_F;
5834   byte3 |= (vex_w & 1) << 7;
5835   // confine pre opcode extensions in pp bits to lower two bits
5836   // of form {66, F3, F2}
5837   byte3 |= pre;
5838   emit_int8(byte3);
5839 
5840   // P2: byte 4 as zL'Lbv'aaa
5841   int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
5842   // EVEX.v` for extending EVEX.vvvv or VIDX
5843   byte4 |= (evex_v ? 0: EVEX_V);
5844   // third EXEC.b for broadcast actions
5845   byte4 |= (is_extended_context ? EVEX_Rb : 0);
5846   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
5847   byte4 |= ((vector_len) & 0x3) << 5;
5848   // last is EVEX.z for zero/merge actions
5849   byte4 |= (is_merge_context ? EVEX_Z : 0);
5850   emit_int8(byte4);
5851 }
5852 
5853 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
5854                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
5855   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
5856   bool vex_b = adr.base_needs_rex();
5857   bool vex_x = adr.index_needs_rex();
5858   avx_vector_len = vector_len;
5859 
5860   // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
5861   if (VM_Version::supports_avx512vl() == false) {
5862     switch (vector_len) {
5863     case AVX_128bit:
5864     case AVX_256bit:
5865       legacy_mode = true;
5866       break;
5867     }
5868   }
5869 
5870   if ((UseAVX > 2) && (legacy_mode == false))
5871   {
5872     bool evex_r = (xreg_enc >= 16);
5873     bool evex_v = (nds_enc >= 16);
5874     is_evex_instruction = true;
5875     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5876   } else {
5877     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5878   }
5879 }
5880 
5881 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
5882                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
5883   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
5884   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
5885   bool vex_x = false;
5886   avx_vector_len = vector_len;
5887 
5888   // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
5889   if (VM_Version::supports_avx512vl() == false) {
5890     switch (vector_len) {
5891     case AVX_128bit:
5892     case AVX_256bit:
5893       legacy_mode = true;
5894       break;
5895     }
5896   }
5897 
5898   if ((UseAVX > 2) && (legacy_mode == false))
5899   {
5900     bool evex_r = (dst_enc >= 16);
5901     bool evex_v = (nds_enc >= 16);
5902     // can use vex_x as bank extender on rm encoding
5903     vex_x = (src_enc >= 16);
5904     evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
5905   } else {
5906     vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
5907   }
5908 
5909   // return modrm byte components for operands
5910   return (((dst_enc & 7) << 3) | (src_enc & 7));
5911 }
5912 
5913 
5914 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
5915                             bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5916   if (UseAVX > 0) {
5917     int xreg_enc = xreg->encoding();
5918     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
5919     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5920   } else {
5921     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
5922     rex_prefix(adr, xreg, pre, opc, rex_w);
5923   }
5924 }
5925 
5926 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
5927                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
5928   int dst_enc = dst->encoding();
5929   int src_enc = src->encoding();
5930   if (UseAVX > 0) {
5931     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5932     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
5933   } else {
5934     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
5935     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
5936   }
5937 }
5938 
5939 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
5940                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5941   int dst_enc = dst->encoding();
5942   int src_enc = src->encoding();
5943   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5944   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5945 }
5946 
5947 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
5948                                       bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
5949   int dst_enc = dst->encoding();
5950   int src_enc = src->encoding();
5951   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
5952   return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
5953 }
5954 
5955 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5956   InstructionMark im(this);
5957   simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
5958   emit_int8(opcode);
5959   emit_operand(dst, src);
5960 }
5961 
5962 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
5963   InstructionMark im(this);
5964   simd_prefix_q(dst, dst, src, pre, no_mask_reg);
5965   emit_int8(opcode);
5966   emit_operand(dst, src);
5967 }
5968 
5969 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5970   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
5971   emit_int8(opcode);
5972   emit_int8((unsigned char)(0xC0 | encode));
5973 }
5974 
5975 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
5976   int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
5977   emit_int8(opcode);
5978   emit_int8((unsigned char)(0xC0 | encode));
5979 }
5980 
5981 // Versions with no second source register (non-destructive source).
5982 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5983   InstructionMark im(this);
5984   simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
5985   emit_int8(opcode);
5986   emit_operand(dst, src);
5987 }
5988 
5989 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
5990   InstructionMark im(this);
5991   simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
5992   emit_int8(opcode);
5993   emit_operand(dst, src);
5994 }
5995 
5996 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
5997   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, legacy_mode, AVX_128bit);
5998   emit_int8(opcode);
5999   emit_int8((unsigned char)(0xC0 | encode));
6000 }
6001 
6002 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
6003   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
6004   emit_int8(opcode);
6005   emit_int8((unsigned char)(0xC0 | encode));
6006 }
6007 
6008 // 3-operands AVX instructions
6009 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
6010                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6011   InstructionMark im(this);
6012   vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
6013   emit_int8(opcode);
6014   emit_operand(dst, src);
6015 }
6016 
6017 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
6018                                  Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6019   InstructionMark im(this);
6020   vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
6021   emit_int8(opcode);
6022   emit_operand(dst, src);
6023 }
6024 
6025 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6026                                VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
6027   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg);
6028   emit_int8(opcode);
6029   emit_int8((unsigned char)(0xC0 | encode));
6030 }
6031 
6032 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
6033                                  VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
6034   int src_enc = src->encoding();
6035   int dst_enc = dst->encoding();
6036   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
6037   int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
6038   emit_int8(opcode);
6039   emit_int8((unsigned char)(0xC0 | encode));
6040 }
6041 
6042 #ifndef _LP64
6043 
6044 void Assembler::incl(Register dst) {
6045   // Don't use it directly. Use MacroAssembler::incrementl() instead.
6046   emit_int8(0x40 | dst->encoding());
6047 }
6048 
6049 void Assembler::lea(Register dst, Address src) {
6050   leal(dst, src);
6051 }
6052 
6053 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
6054   InstructionMark im(this);
6055   emit_int8((unsigned char)0xC7);
6056   emit_operand(rax, dst);
6057   emit_data((int)imm32, rspec, 0);
6058 }
6059 
6060 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
6061   InstructionMark im(this);
6062   int encode = prefix_and_encode(dst->encoding());
6063   emit_int8((unsigned char)(0xB8 | encode));
6064   emit_data((int)imm32, rspec, 0);
6065 }
6066 
6067 void Assembler::popa() { // 32bit
6068   emit_int8(0x61);
6069 }
6070 
6071 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
6072   InstructionMark im(this);
6073   emit_int8(0x68);
6074   emit_data(imm32, rspec, 0);
6075 }
6076 
6077 void Assembler::pusha() { // 32bit
6078   emit_int8(0x60);
6079 }
6080 
6081 void Assembler::set_byte_if_not_zero(Register dst) {
6082   emit_int8(0x0F);
6083   emit_int8((unsigned char)0x95);
6084   emit_int8((unsigned char)(0xE0 | dst->encoding()));
6085 }
6086 
6087 void Assembler::shldl(Register dst, Register src) {
6088   emit_int8(0x0F);
6089   emit_int8((unsigned char)0xA5);
6090   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6091 }
6092 
6093 // 0F A4 / r ib
6094 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
6095   emit_int8(0x0F);
6096   emit_int8((unsigned char)0xA4);
6097   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6098   emit_int8(imm8);
6099 }
6100 
6101 void Assembler::shrdl(Register dst, Register src) {
6102   emit_int8(0x0F);
6103   emit_int8((unsigned char)0xAD);
6104   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
6105 }
6106 
6107 #else // LP64
6108 
6109 void Assembler::set_byte_if_not_zero(Register dst) {
6110   int enc = prefix_and_encode(dst->encoding(), true);
6111   emit_int8(0x0F);
6112   emit_int8((unsigned char)0x95);
6113   emit_int8((unsigned char)(0xE0 | enc));
6114 }
6115 
6116 // 64bit only pieces of the assembler
6117 // This should only be used by 64bit instructions that can use rip-relative
6118 // it cannot be used by instructions that want an immediate value.
6119 
6120 bool Assembler::reachable(AddressLiteral adr) {
6121   int64_t disp;
6122   // None will force a 64bit literal to the code stream. Likely a placeholder
6123   // for something that will be patched later and we need to certain it will
6124   // always be reachable.
6125   if (adr.reloc() == relocInfo::none) {
6126     return false;
6127   }
6128   if (adr.reloc() == relocInfo::internal_word_type) {
6129     // This should be rip relative and easily reachable.
6130     return true;
6131   }
6132   if (adr.reloc() == relocInfo::virtual_call_type ||
6133       adr.reloc() == relocInfo::opt_virtual_call_type ||
6134       adr.reloc() == relocInfo::static_call_type ||
6135       adr.reloc() == relocInfo::static_stub_type ) {
6136     // This should be rip relative within the code cache and easily
6137     // reachable until we get huge code caches. (At which point
6138     // ic code is going to have issues).
6139     return true;
6140   }
6141   if (adr.reloc() != relocInfo::external_word_type &&
6142       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
6143       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
6144       adr.reloc() != relocInfo::runtime_call_type ) {
6145     return false;
6146   }
6147 
6148   // Stress the correction code
6149   if (ForceUnreachable) {
6150     // Must be runtimecall reloc, see if it is in the codecache
6151     // Flipping stuff in the codecache to be unreachable causes issues
6152     // with things like inline caches where the additional instructions
6153     // are not handled.
6154     if (CodeCache::find_blob(adr._target) == NULL) {
6155       return false;
6156     }
6157   }
6158   // For external_word_type/runtime_call_type if it is reachable from where we
6159   // are now (possibly a temp buffer) and where we might end up
6160   // anywhere in the codeCache then we are always reachable.
6161   // This would have to change if we ever save/restore shared code
6162   // to be more pessimistic.
6163   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
6164   if (!is_simm32(disp)) return false;
6165   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
6166   if (!is_simm32(disp)) return false;
6167 
6168   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
6169 
6170   // Because rip relative is a disp + address_of_next_instruction and we
6171   // don't know the value of address_of_next_instruction we apply a fudge factor
6172   // to make sure we will be ok no matter the size of the instruction we get placed into.
6173   // We don't have to fudge the checks above here because they are already worst case.
6174 
6175   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
6176   // + 4 because better safe than sorry.
6177   const int fudge = 12 + 4;
6178   if (disp < 0) {
6179     disp -= fudge;
6180   } else {
6181     disp += fudge;
6182   }
6183   return is_simm32(disp);
6184 }
6185 
6186 // Check if the polling page is not reachable from the code cache using rip-relative
6187 // addressing.
6188 bool Assembler::is_polling_page_far() {
6189   intptr_t addr = (intptr_t)os::get_polling_page();
6190   return ForceUnreachable ||
6191          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
6192          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
6193 }
6194 
6195 void Assembler::emit_data64(jlong data,
6196                             relocInfo::relocType rtype,
6197                             int format) {
6198   if (rtype == relocInfo::none) {
6199     emit_int64(data);
6200   } else {
6201     emit_data64(data, Relocation::spec_simple(rtype), format);
6202   }
6203 }
6204 
6205 void Assembler::emit_data64(jlong data,
6206                             RelocationHolder const& rspec,
6207                             int format) {
6208   assert(imm_operand == 0, "default format must be immediate in this file");
6209   assert(imm_operand == format, "must be immediate");
6210   assert(inst_mark() != NULL, "must be inside InstructionMark");
6211   // Do not use AbstractAssembler::relocate, which is not intended for
6212   // embedded words.  Instead, relocate to the enclosing instruction.
6213   code_section()->relocate(inst_mark(), rspec, format);
6214 #ifdef ASSERT
6215   check_relocation(rspec, format);
6216 #endif
6217   emit_int64(data);
6218 }
6219 
6220 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
6221   if (reg_enc >= 8) {
6222     prefix(REX_B);
6223     reg_enc -= 8;
6224   } else if (byteinst && reg_enc >= 4) {
6225     prefix(REX);
6226   }
6227   return reg_enc;
6228 }
6229 
6230 int Assembler::prefixq_and_encode(int reg_enc) {
6231   if (reg_enc < 8) {
6232     prefix(REX_W);
6233   } else {
6234     prefix(REX_WB);
6235     reg_enc -= 8;
6236   }
6237   return reg_enc;
6238 }
6239 
6240 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
6241   if (dst_enc < 8) {
6242     if (src_enc >= 8) {
6243       prefix(REX_B);
6244       src_enc -= 8;
6245     } else if (byteinst && src_enc >= 4) {
6246       prefix(REX);
6247     }
6248   } else {
6249     if (src_enc < 8) {
6250       prefix(REX_R);
6251     } else {
6252       prefix(REX_RB);
6253       src_enc -= 8;
6254     }
6255     dst_enc -= 8;
6256   }
6257   return dst_enc << 3 | src_enc;
6258 }
6259 
6260 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
6261   if (dst_enc < 8) {
6262     if (src_enc < 8) {
6263       prefix(REX_W);
6264     } else {
6265       prefix(REX_WB);
6266       src_enc -= 8;
6267     }
6268   } else {
6269     if (src_enc < 8) {
6270       prefix(REX_WR);
6271     } else {
6272       prefix(REX_WRB);
6273       src_enc -= 8;
6274     }
6275     dst_enc -= 8;
6276   }
6277   return dst_enc << 3 | src_enc;
6278 }
6279 
6280 void Assembler::prefix(Register reg) {
6281   if (reg->encoding() >= 8) {
6282     prefix(REX_B);
6283   }
6284 }
6285 
6286 void Assembler::prefix(Register dst, Register src, Prefix p) {
6287   if (src->encoding() >= 8) {
6288     p = (Prefix)(p | REX_B);
6289   }
6290   if (dst->encoding() >= 8) {
6291     p = (Prefix)( p | REX_R);
6292   }
6293   if (p != Prefix_EMPTY) {
6294     // do not generate an empty prefix
6295     prefix(p);
6296   }
6297 }
6298 
6299 void Assembler::prefix(Register dst, Address adr, Prefix p) {
6300   if (adr.base_needs_rex()) {
6301     if (adr.index_needs_rex()) {
6302       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6303     } else {
6304       prefix(REX_B);
6305     }
6306   } else {
6307     if (adr.index_needs_rex()) {
6308       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
6309     }
6310   }
6311   if (dst->encoding() >= 8) {
6312     p = (Prefix)(p | REX_R);
6313   }
6314   if (p != Prefix_EMPTY) {
6315     // do not generate an empty prefix
6316     prefix(p);
6317   }
6318 }
6319 
6320 void Assembler::prefix(Address adr) {
6321   if (adr.base_needs_rex()) {
6322     if (adr.index_needs_rex()) {
6323       prefix(REX_XB);
6324     } else {
6325       prefix(REX_B);
6326     }
6327   } else {
6328     if (adr.index_needs_rex()) {
6329       prefix(REX_X);
6330     }
6331   }
6332 }
6333 
6334 void Assembler::prefixq(Address adr) {
6335   if (adr.base_needs_rex()) {
6336     if (adr.index_needs_rex()) {
6337       prefix(REX_WXB);
6338     } else {
6339       prefix(REX_WB);
6340     }
6341   } else {
6342     if (adr.index_needs_rex()) {
6343       prefix(REX_WX);
6344     } else {
6345       prefix(REX_W);
6346     }
6347   }
6348 }
6349 
6350 
6351 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
6352   if (reg->encoding() < 8) {
6353     if (adr.base_needs_rex()) {
6354       if (adr.index_needs_rex()) {
6355         prefix(REX_XB);
6356       } else {
6357         prefix(REX_B);
6358       }
6359     } else {
6360       if (adr.index_needs_rex()) {
6361         prefix(REX_X);
6362       } else if (byteinst && reg->encoding() >= 4 ) {
6363         prefix(REX);
6364       }
6365     }
6366   } else {
6367     if (adr.base_needs_rex()) {
6368       if (adr.index_needs_rex()) {
6369         prefix(REX_RXB);
6370       } else {
6371         prefix(REX_RB);
6372       }
6373     } else {
6374       if (adr.index_needs_rex()) {
6375         prefix(REX_RX);
6376       } else {
6377         prefix(REX_R);
6378       }
6379     }
6380   }
6381 }
6382 
6383 void Assembler::prefixq(Address adr, Register src) {
6384   if (src->encoding() < 8) {
6385     if (adr.base_needs_rex()) {
6386       if (adr.index_needs_rex()) {
6387         prefix(REX_WXB);
6388       } else {
6389         prefix(REX_WB);
6390       }
6391     } else {
6392       if (adr.index_needs_rex()) {
6393         prefix(REX_WX);
6394       } else {
6395         prefix(REX_W);
6396       }
6397     }
6398   } else {
6399     if (adr.base_needs_rex()) {
6400       if (adr.index_needs_rex()) {
6401         prefix(REX_WRXB);
6402       } else {
6403         prefix(REX_WRB);
6404       }
6405     } else {
6406       if (adr.index_needs_rex()) {
6407         prefix(REX_WRX);
6408       } else {
6409         prefix(REX_WR);
6410       }
6411     }
6412   }
6413 }
6414 
6415 void Assembler::prefix(Address adr, XMMRegister reg) {
6416   if (reg->encoding() < 8) {
6417     if (adr.base_needs_rex()) {
6418       if (adr.index_needs_rex()) {
6419         prefix(REX_XB);
6420       } else {
6421         prefix(REX_B);
6422       }
6423     } else {
6424       if (adr.index_needs_rex()) {
6425         prefix(REX_X);
6426       }
6427     }
6428   } else {
6429     if (adr.base_needs_rex()) {
6430       if (adr.index_needs_rex()) {
6431         prefix(REX_RXB);
6432       } else {
6433         prefix(REX_RB);
6434       }
6435     } else {
6436       if (adr.index_needs_rex()) {
6437         prefix(REX_RX);
6438       } else {
6439         prefix(REX_R);
6440       }
6441     }
6442   }
6443 }
6444 
6445 void Assembler::prefixq(Address adr, XMMRegister src) {
6446   if (src->encoding() < 8) {
6447     if (adr.base_needs_rex()) {
6448       if (adr.index_needs_rex()) {
6449         prefix(REX_WXB);
6450       } else {
6451         prefix(REX_WB);
6452       }
6453     } else {
6454       if (adr.index_needs_rex()) {
6455         prefix(REX_WX);
6456       } else {
6457         prefix(REX_W);
6458       }
6459     }
6460   } else {
6461     if (adr.base_needs_rex()) {
6462       if (adr.index_needs_rex()) {
6463         prefix(REX_WRXB);
6464       } else {
6465         prefix(REX_WRB);
6466       }
6467     } else {
6468       if (adr.index_needs_rex()) {
6469         prefix(REX_WRX);
6470       } else {
6471         prefix(REX_WR);
6472       }
6473     }
6474   }
6475 }
6476 
6477 void Assembler::adcq(Register dst, int32_t imm32) {
6478   (void) prefixq_and_encode(dst->encoding());
6479   emit_arith(0x81, 0xD0, dst, imm32);
6480 }
6481 
6482 void Assembler::adcq(Register dst, Address src) {
6483   InstructionMark im(this);
6484   prefixq(src, dst);
6485   emit_int8(0x13);
6486   emit_operand(dst, src);
6487 }
6488 
6489 void Assembler::adcq(Register dst, Register src) {
6490   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6491   emit_arith(0x13, 0xC0, dst, src);
6492 }
6493 
6494 void Assembler::addq(Address dst, int32_t imm32) {
6495   InstructionMark im(this);
6496   prefixq(dst);
6497   emit_arith_operand(0x81, rax, dst,imm32);
6498 }
6499 
6500 void Assembler::addq(Address dst, Register src) {
6501   InstructionMark im(this);
6502   prefixq(dst, src);
6503   emit_int8(0x01);
6504   emit_operand(src, dst);
6505 }
6506 
6507 void Assembler::addq(Register dst, int32_t imm32) {
6508   (void) prefixq_and_encode(dst->encoding());
6509   emit_arith(0x81, 0xC0, dst, imm32);
6510 }
6511 
6512 void Assembler::addq(Register dst, Address src) {
6513   InstructionMark im(this);
6514   prefixq(src, dst);
6515   emit_int8(0x03);
6516   emit_operand(dst, src);
6517 }
6518 
6519 void Assembler::addq(Register dst, Register src) {
6520   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6521   emit_arith(0x03, 0xC0, dst, src);
6522 }
6523 
6524 void Assembler::adcxq(Register dst, Register src) {
6525   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6526   emit_int8((unsigned char)0x66);
6527   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6528   emit_int8(0x0F);
6529   emit_int8(0x38);
6530   emit_int8((unsigned char)0xF6);
6531   emit_int8((unsigned char)(0xC0 | encode));
6532 }
6533 
6534 void Assembler::adoxq(Register dst, Register src) {
6535   //assert(VM_Version::supports_adx(), "adx instructions not supported");
6536   emit_int8((unsigned char)0xF3);
6537   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6538   emit_int8(0x0F);
6539   emit_int8(0x38);
6540   emit_int8((unsigned char)0xF6);
6541   emit_int8((unsigned char)(0xC0 | encode));
6542 }
6543 
6544 void Assembler::andq(Address dst, int32_t imm32) {
6545   InstructionMark im(this);
6546   prefixq(dst);
6547   emit_int8((unsigned char)0x81);
6548   emit_operand(rsp, dst, 4);
6549   emit_int32(imm32);
6550 }
6551 
6552 void Assembler::andq(Register dst, int32_t imm32) {
6553   (void) prefixq_and_encode(dst->encoding());
6554   emit_arith(0x81, 0xE0, dst, imm32);
6555 }
6556 
6557 void Assembler::andq(Register dst, Address src) {
6558   InstructionMark im(this);
6559   prefixq(src, dst);
6560   emit_int8(0x23);
6561   emit_operand(dst, src);
6562 }
6563 
6564 void Assembler::andq(Register dst, Register src) {
6565   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6566   emit_arith(0x23, 0xC0, dst, src);
6567 }
6568 
6569 void Assembler::andnq(Register dst, Register src1, Register src2) {
6570   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6571   int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
6572   emit_int8((unsigned char)0xF2);
6573   emit_int8((unsigned char)(0xC0 | encode));
6574 }
6575 
6576 void Assembler::andnq(Register dst, Register src1, Address src2) {
6577   InstructionMark im(this);
6578   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6579   vex_prefix_0F38_q_legacy(dst, src1, src2);
6580   emit_int8((unsigned char)0xF2);
6581   emit_operand(dst, src2);
6582 }
6583 
6584 void Assembler::bsfq(Register dst, Register src) {
6585   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6586   emit_int8(0x0F);
6587   emit_int8((unsigned char)0xBC);
6588   emit_int8((unsigned char)(0xC0 | encode));
6589 }
6590 
6591 void Assembler::bsrq(Register dst, Register src) {
6592   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6593   emit_int8(0x0F);
6594   emit_int8((unsigned char)0xBD);
6595   emit_int8((unsigned char)(0xC0 | encode));
6596 }
6597 
6598 void Assembler::bswapq(Register reg) {
6599   int encode = prefixq_and_encode(reg->encoding());
6600   emit_int8(0x0F);
6601   emit_int8((unsigned char)(0xC8 | encode));
6602 }
6603 
6604 void Assembler::blsiq(Register dst, Register src) {
6605   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6606   int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
6607   emit_int8((unsigned char)0xF3);
6608   emit_int8((unsigned char)(0xC0 | encode));
6609 }
6610 
6611 void Assembler::blsiq(Register dst, Address src) {
6612   InstructionMark im(this);
6613   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6614   vex_prefix_0F38_q_legacy(rbx, dst, src);
6615   emit_int8((unsigned char)0xF3);
6616   emit_operand(rbx, src);
6617 }
6618 
6619 void Assembler::blsmskq(Register dst, Register src) {
6620   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6621   int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
6622   emit_int8((unsigned char)0xF3);
6623   emit_int8((unsigned char)(0xC0 | encode));
6624 }
6625 
6626 void Assembler::blsmskq(Register dst, Address src) {
6627   InstructionMark im(this);
6628   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6629   vex_prefix_0F38_q_legacy(rdx, dst, src);
6630   emit_int8((unsigned char)0xF3);
6631   emit_operand(rdx, src);
6632 }
6633 
6634 void Assembler::blsrq(Register dst, Register src) {
6635   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6636   int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
6637   emit_int8((unsigned char)0xF3);
6638   emit_int8((unsigned char)(0xC0 | encode));
6639 }
6640 
6641 void Assembler::blsrq(Register dst, Address src) {
6642   InstructionMark im(this);
6643   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
6644   vex_prefix_0F38_q_legacy(rcx, dst, src);
6645   emit_int8((unsigned char)0xF3);
6646   emit_operand(rcx, src);
6647 }
6648 
6649 void Assembler::cdqq() {
6650   prefix(REX_W);
6651   emit_int8((unsigned char)0x99);
6652 }
6653 
6654 void Assembler::clflush(Address adr) {
6655   prefix(adr);
6656   emit_int8(0x0F);
6657   emit_int8((unsigned char)0xAE);
6658   emit_operand(rdi, adr);
6659 }
6660 
6661 void Assembler::cmovq(Condition cc, Register dst, Register src) {
6662   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6663   emit_int8(0x0F);
6664   emit_int8(0x40 | cc);
6665   emit_int8((unsigned char)(0xC0 | encode));
6666 }
6667 
6668 void Assembler::cmovq(Condition cc, Register dst, Address src) {
6669   InstructionMark im(this);
6670   prefixq(src, dst);
6671   emit_int8(0x0F);
6672   emit_int8(0x40 | cc);
6673   emit_operand(dst, src);
6674 }
6675 
6676 void Assembler::cmpq(Address dst, int32_t imm32) {
6677   InstructionMark im(this);
6678   prefixq(dst);
6679   emit_int8((unsigned char)0x81);
6680   emit_operand(rdi, dst, 4);
6681   emit_int32(imm32);
6682 }
6683 
6684 void Assembler::cmpq(Register dst, int32_t imm32) {
6685   (void) prefixq_and_encode(dst->encoding());
6686   emit_arith(0x81, 0xF8, dst, imm32);
6687 }
6688 
6689 void Assembler::cmpq(Address dst, Register src) {
6690   InstructionMark im(this);
6691   prefixq(dst, src);
6692   emit_int8(0x3B);
6693   emit_operand(src, dst);
6694 }
6695 
6696 void Assembler::cmpq(Register dst, Register src) {
6697   (void) prefixq_and_encode(dst->encoding(), src->encoding());
6698   emit_arith(0x3B, 0xC0, dst, src);
6699 }
6700 
6701 void Assembler::cmpq(Register dst, Address  src) {
6702   InstructionMark im(this);
6703   prefixq(src, dst);
6704   emit_int8(0x3B);
6705   emit_operand(dst, src);
6706 }
6707 
6708 void Assembler::cmpxchgq(Register reg, Address adr) {
6709   InstructionMark im(this);
6710   prefixq(adr, reg);
6711   emit_int8(0x0F);
6712   emit_int8((unsigned char)0xB1);
6713   emit_operand(reg, adr);
6714 }
6715 
6716 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
6717   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6718   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
6719   emit_int8(0x2A);
6720   emit_int8((unsigned char)(0xC0 | encode));
6721 }
6722 
6723 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
6724   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6725   if (VM_Version::supports_evex()) {
6726     tuple_type = EVEX_T1S;
6727     input_size_in_bits = EVEX_32bit;
6728   }
6729   InstructionMark im(this);
6730   simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true);
6731   emit_int8(0x2A);
6732   emit_operand(dst, src);
6733 }
6734 
6735 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
6736   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6737   if (VM_Version::supports_evex()) {
6738     tuple_type = EVEX_T1S;
6739     input_size_in_bits = EVEX_32bit;
6740   }
6741   InstructionMark im(this);
6742   simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true);
6743   emit_int8(0x2A);
6744   emit_operand(dst, src);
6745 }
6746 
6747 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
6748   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6749   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
6750   emit_int8(0x2C);
6751   emit_int8((unsigned char)(0xC0 | encode));
6752 }
6753 
6754 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
6755   NOT_LP64(assert(VM_Version::supports_sse(), ""));
6756   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
6757   emit_int8(0x2C);
6758   emit_int8((unsigned char)(0xC0 | encode));
6759 }
6760 
6761 void Assembler::decl(Register dst) {
6762   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6763   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
6764   int encode = prefix_and_encode(dst->encoding());
6765   emit_int8((unsigned char)0xFF);
6766   emit_int8((unsigned char)(0xC8 | encode));
6767 }
6768 
6769 void Assembler::decq(Register dst) {
6770   // Don't use it directly. Use MacroAssembler::decrementq() instead.
6771   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
6772   int encode = prefixq_and_encode(dst->encoding());
6773   emit_int8((unsigned char)0xFF);
6774   emit_int8(0xC8 | encode);
6775 }
6776 
6777 void Assembler::decq(Address dst) {
6778   // Don't use it directly. Use MacroAssembler::decrementq() instead.
6779   InstructionMark im(this);
6780   prefixq(dst);
6781   emit_int8((unsigned char)0xFF);
6782   emit_operand(rcx, dst);
6783 }
6784 
6785 void Assembler::fxrstor(Address src) {
6786   prefixq(src);
6787   emit_int8(0x0F);
6788   emit_int8((unsigned char)0xAE);
6789   emit_operand(as_Register(1), src);
6790 }
6791 
6792 void Assembler::fxsave(Address dst) {
6793   prefixq(dst);
6794   emit_int8(0x0F);
6795   emit_int8((unsigned char)0xAE);
6796   emit_operand(as_Register(0), dst);
6797 }
6798 
6799 void Assembler::idivq(Register src) {
6800   int encode = prefixq_and_encode(src->encoding());
6801   emit_int8((unsigned char)0xF7);
6802   emit_int8((unsigned char)(0xF8 | encode));
6803 }
6804 
6805 void Assembler::imulq(Register dst, Register src) {
6806   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6807   emit_int8(0x0F);
6808   emit_int8((unsigned char)0xAF);
6809   emit_int8((unsigned char)(0xC0 | encode));
6810 }
6811 
6812 void Assembler::imulq(Register dst, Register src, int value) {
6813   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6814   if (is8bit(value)) {
6815     emit_int8(0x6B);
6816     emit_int8((unsigned char)(0xC0 | encode));
6817     emit_int8(value & 0xFF);
6818   } else {
6819     emit_int8(0x69);
6820     emit_int8((unsigned char)(0xC0 | encode));
6821     emit_int32(value);
6822   }
6823 }
6824 
6825 void Assembler::imulq(Register dst, Address src) {
6826   InstructionMark im(this);
6827   prefixq(src, dst);
6828   emit_int8(0x0F);
6829   emit_int8((unsigned char) 0xAF);
6830   emit_operand(dst, src);
6831 }
6832 
6833 void Assembler::incl(Register dst) {
6834   // Don't use it directly. Use MacroAssembler::incrementl() instead.
6835   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
6836   int encode = prefix_and_encode(dst->encoding());
6837   emit_int8((unsigned char)0xFF);
6838   emit_int8((unsigned char)(0xC0 | encode));
6839 }
6840 
6841 void Assembler::incq(Register dst) {
6842   // Don't use it directly. Use MacroAssembler::incrementq() instead.
6843   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
6844   int encode = prefixq_and_encode(dst->encoding());
6845   emit_int8((unsigned char)0xFF);
6846   emit_int8((unsigned char)(0xC0 | encode));
6847 }
6848 
6849 void Assembler::incq(Address dst) {
6850   // Don't use it directly. Use MacroAssembler::incrementq() instead.
6851   InstructionMark im(this);
6852   prefixq(dst);
6853   emit_int8((unsigned char)0xFF);
6854   emit_operand(rax, dst);
6855 }
6856 
6857 void Assembler::lea(Register dst, Address src) {
6858   leaq(dst, src);
6859 }
6860 
6861 void Assembler::leaq(Register dst, Address src) {
6862   InstructionMark im(this);
6863   prefixq(src, dst);
6864   emit_int8((unsigned char)0x8D);
6865   emit_operand(dst, src);
6866 }
6867 
6868 void Assembler::mov64(Register dst, int64_t imm64) {
6869   InstructionMark im(this);
6870   int encode = prefixq_and_encode(dst->encoding());
6871   emit_int8((unsigned char)(0xB8 | encode));
6872   emit_int64(imm64);
6873 }
6874 
6875 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
6876   InstructionMark im(this);
6877   int encode = prefixq_and_encode(dst->encoding());
6878   emit_int8(0xB8 | encode);
6879   emit_data64(imm64, rspec);
6880 }
6881 
6882 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
6883   InstructionMark im(this);
6884   int encode = prefix_and_encode(dst->encoding());
6885   emit_int8((unsigned char)(0xB8 | encode));
6886   emit_data((int)imm32, rspec, narrow_oop_operand);
6887 }
6888 
6889 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
6890   InstructionMark im(this);
6891   prefix(dst);
6892   emit_int8((unsigned char)0xC7);
6893   emit_operand(rax, dst, 4);
6894   emit_data((int)imm32, rspec, narrow_oop_operand);
6895 }
6896 
6897 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6898   InstructionMark im(this);
6899   int encode = prefix_and_encode(src1->encoding());
6900   emit_int8((unsigned char)0x81);
6901   emit_int8((unsigned char)(0xF8 | encode));
6902   emit_data((int)imm32, rspec, narrow_oop_operand);
6903 }
6904 
6905 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6906   InstructionMark im(this);
6907   prefix(src1);
6908   emit_int8((unsigned char)0x81);
6909   emit_operand(rax, src1, 4);
6910   emit_data((int)imm32, rspec, narrow_oop_operand);
6911 }
6912 
6913 void Assembler::lzcntq(Register dst, Register src) {
6914   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
6915   emit_int8((unsigned char)0xF3);
6916   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6917   emit_int8(0x0F);
6918   emit_int8((unsigned char)0xBD);
6919   emit_int8((unsigned char)(0xC0 | encode));
6920 }
6921 
6922 void Assembler::movdq(XMMRegister dst, Register src) {
6923   // table D-1 says MMX/SSE2
6924   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6925   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true);
6926   emit_int8(0x6E);
6927   emit_int8((unsigned char)(0xC0 | encode));
6928 }
6929 
6930 void Assembler::movdq(Register dst, XMMRegister src) {
6931   // table D-1 says MMX/SSE2
6932   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6933   // swap src/dst to get correct prefix
6934   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true);
6935   emit_int8(0x7E);
6936   emit_int8((unsigned char)(0xC0 | encode));
6937 }
6938 
6939 void Assembler::movq(Register dst, Register src) {
6940   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6941   emit_int8((unsigned char)0x8B);
6942   emit_int8((unsigned char)(0xC0 | encode));
6943 }
6944 
6945 void Assembler::movq(Register dst, Address src) {
6946   InstructionMark im(this);
6947   prefixq(src, dst);
6948   emit_int8((unsigned char)0x8B);
6949   emit_operand(dst, src);
6950 }
6951 
6952 void Assembler::movq(Address dst, Register src) {
6953   InstructionMark im(this);
6954   prefixq(dst, src);
6955   emit_int8((unsigned char)0x89);
6956   emit_operand(src, dst);
6957 }
6958 
6959 void Assembler::movsbq(Register dst, Address src) {
6960   InstructionMark im(this);
6961   prefixq(src, dst);
6962   emit_int8(0x0F);
6963   emit_int8((unsigned char)0xBE);
6964   emit_operand(dst, src);
6965 }
6966 
6967 void Assembler::movsbq(Register dst, Register src) {
6968   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
6969   emit_int8(0x0F);
6970   emit_int8((unsigned char)0xBE);
6971   emit_int8((unsigned char)(0xC0 | encode));
6972 }
6973 
6974 void Assembler::movslq(Register dst, int32_t imm32) {
6975   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
6976   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
6977   // as a result we shouldn't use until tested at runtime...
6978   ShouldNotReachHere();
6979   InstructionMark im(this);
6980   int encode = prefixq_and_encode(dst->encoding());
6981   emit_int8((unsigned char)(0xC7 | encode));
6982   emit_int32(imm32);
6983 }
6984 
6985 void Assembler::movslq(Address dst, int32_t imm32) {
6986   assert(is_simm32(imm32), "lost bits");
6987   InstructionMark im(this);
6988   prefixq(dst);
6989   emit_int8((unsigned char)0xC7);
6990   emit_operand(rax, dst, 4);
6991   emit_int32(imm32);
6992 }
6993 
6994 void Assembler::movslq(Register dst, Address src) {
6995   InstructionMark im(this);
6996   prefixq(src, dst);
6997   emit_int8(0x63);
6998   emit_operand(dst, src);
6999 }
7000 
7001 void Assembler::movslq(Register dst, Register src) {
7002   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7003   emit_int8(0x63);
7004   emit_int8((unsigned char)(0xC0 | encode));
7005 }
7006 
7007 void Assembler::movswq(Register dst, Address src) {
7008   InstructionMark im(this);
7009   prefixq(src, dst);
7010   emit_int8(0x0F);
7011   emit_int8((unsigned char)0xBF);
7012   emit_operand(dst, src);
7013 }
7014 
7015 void Assembler::movswq(Register dst, Register src) {
7016   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7017   emit_int8((unsigned char)0x0F);
7018   emit_int8((unsigned char)0xBF);
7019   emit_int8((unsigned char)(0xC0 | encode));
7020 }
7021 
7022 void Assembler::movzbq(Register dst, Address src) {
7023   InstructionMark im(this);
7024   prefixq(src, dst);
7025   emit_int8((unsigned char)0x0F);
7026   emit_int8((unsigned char)0xB6);
7027   emit_operand(dst, src);
7028 }
7029 
7030 void Assembler::movzbq(Register dst, Register src) {
7031   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7032   emit_int8(0x0F);
7033   emit_int8((unsigned char)0xB6);
7034   emit_int8(0xC0 | encode);
7035 }
7036 
7037 void Assembler::movzwq(Register dst, Address src) {
7038   InstructionMark im(this);
7039   prefixq(src, dst);
7040   emit_int8((unsigned char)0x0F);
7041   emit_int8((unsigned char)0xB7);
7042   emit_operand(dst, src);
7043 }
7044 
7045 void Assembler::movzwq(Register dst, Register src) {
7046   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7047   emit_int8((unsigned char)0x0F);
7048   emit_int8((unsigned char)0xB7);
7049   emit_int8((unsigned char)(0xC0 | encode));
7050 }
7051 
7052 void Assembler::mulq(Address src) {
7053   InstructionMark im(this);
7054   prefixq(src);
7055   emit_int8((unsigned char)0xF7);
7056   emit_operand(rsp, src);
7057 }
7058 
7059 void Assembler::mulq(Register src) {
7060   int encode = prefixq_and_encode(src->encoding());
7061   emit_int8((unsigned char)0xF7);
7062   emit_int8((unsigned char)(0xE0 | encode));
7063 }
7064 
7065 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
7066   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7067   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(),
7068                                      VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false);
7069   emit_int8((unsigned char)0xF6);
7070   emit_int8((unsigned char)(0xC0 | encode));
7071 }
7072 
7073 void Assembler::negq(Register dst) {
7074   int encode = prefixq_and_encode(dst->encoding());
7075   emit_int8((unsigned char)0xF7);
7076   emit_int8((unsigned char)(0xD8 | encode));
7077 }
7078 
7079 void Assembler::notq(Register dst) {
7080   int encode = prefixq_and_encode(dst->encoding());
7081   emit_int8((unsigned char)0xF7);
7082   emit_int8((unsigned char)(0xD0 | encode));
7083 }
7084 
7085 void Assembler::orq(Address dst, int32_t imm32) {
7086   InstructionMark im(this);
7087   prefixq(dst);
7088   emit_int8((unsigned char)0x81);
7089   emit_operand(rcx, dst, 4);
7090   emit_int32(imm32);
7091 }
7092 
7093 void Assembler::orq(Register dst, int32_t imm32) {
7094   (void) prefixq_and_encode(dst->encoding());
7095   emit_arith(0x81, 0xC8, dst, imm32);
7096 }
7097 
7098 void Assembler::orq(Register dst, Address src) {
7099   InstructionMark im(this);
7100   prefixq(src, dst);
7101   emit_int8(0x0B);
7102   emit_operand(dst, src);
7103 }
7104 
7105 void Assembler::orq(Register dst, Register src) {
7106   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7107   emit_arith(0x0B, 0xC0, dst, src);
7108 }
7109 
7110 void Assembler::popa() { // 64bit
7111   movq(r15, Address(rsp, 0));
7112   movq(r14, Address(rsp, wordSize));
7113   movq(r13, Address(rsp, 2 * wordSize));
7114   movq(r12, Address(rsp, 3 * wordSize));
7115   movq(r11, Address(rsp, 4 * wordSize));
7116   movq(r10, Address(rsp, 5 * wordSize));
7117   movq(r9,  Address(rsp, 6 * wordSize));
7118   movq(r8,  Address(rsp, 7 * wordSize));
7119   movq(rdi, Address(rsp, 8 * wordSize));
7120   movq(rsi, Address(rsp, 9 * wordSize));
7121   movq(rbp, Address(rsp, 10 * wordSize));
7122   // skip rsp
7123   movq(rbx, Address(rsp, 12 * wordSize));
7124   movq(rdx, Address(rsp, 13 * wordSize));
7125   movq(rcx, Address(rsp, 14 * wordSize));
7126   movq(rax, Address(rsp, 15 * wordSize));
7127 
7128   addq(rsp, 16 * wordSize);
7129 }
7130 
7131 void Assembler::popcntq(Register dst, Address src) {
7132   assert(VM_Version::supports_popcnt(), "must support");
7133   InstructionMark im(this);
7134   emit_int8((unsigned char)0xF3);
7135   prefixq(src, dst);
7136   emit_int8((unsigned char)0x0F);
7137   emit_int8((unsigned char)0xB8);
7138   emit_operand(dst, src);
7139 }
7140 
7141 void Assembler::popcntq(Register dst, Register src) {
7142   assert(VM_Version::supports_popcnt(), "must support");
7143   emit_int8((unsigned char)0xF3);
7144   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7145   emit_int8((unsigned char)0x0F);
7146   emit_int8((unsigned char)0xB8);
7147   emit_int8((unsigned char)(0xC0 | encode));
7148 }
7149 
7150 void Assembler::popq(Address dst) {
7151   InstructionMark im(this);
7152   prefixq(dst);
7153   emit_int8((unsigned char)0x8F);
7154   emit_operand(rax, dst);
7155 }
7156 
7157 void Assembler::pusha() { // 64bit
7158   // we have to store original rsp.  ABI says that 128 bytes
7159   // below rsp are local scratch.
7160   movq(Address(rsp, -5 * wordSize), rsp);
7161 
7162   subq(rsp, 16 * wordSize);
7163 
7164   movq(Address(rsp, 15 * wordSize), rax);
7165   movq(Address(rsp, 14 * wordSize), rcx);
7166   movq(Address(rsp, 13 * wordSize), rdx);
7167   movq(Address(rsp, 12 * wordSize), rbx);
7168   // skip rsp
7169   movq(Address(rsp, 10 * wordSize), rbp);
7170   movq(Address(rsp, 9 * wordSize), rsi);
7171   movq(Address(rsp, 8 * wordSize), rdi);
7172   movq(Address(rsp, 7 * wordSize), r8);
7173   movq(Address(rsp, 6 * wordSize), r9);
7174   movq(Address(rsp, 5 * wordSize), r10);
7175   movq(Address(rsp, 4 * wordSize), r11);
7176   movq(Address(rsp, 3 * wordSize), r12);
7177   movq(Address(rsp, 2 * wordSize), r13);
7178   movq(Address(rsp, wordSize), r14);
7179   movq(Address(rsp, 0), r15);
7180 }
7181 
7182 void Assembler::pushq(Address src) {
7183   InstructionMark im(this);
7184   prefixq(src);
7185   emit_int8((unsigned char)0xFF);
7186   emit_operand(rsi, src);
7187 }
7188 
7189 void Assembler::rclq(Register dst, int imm8) {
7190   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7191   int encode = prefixq_and_encode(dst->encoding());
7192   if (imm8 == 1) {
7193     emit_int8((unsigned char)0xD1);
7194     emit_int8((unsigned char)(0xD0 | encode));
7195   } else {
7196     emit_int8((unsigned char)0xC1);
7197     emit_int8((unsigned char)(0xD0 | encode));
7198     emit_int8(imm8);
7199   }
7200 }
7201 
7202 void Assembler::rcrq(Register dst, int imm8) {
7203   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7204   int encode = prefixq_and_encode(dst->encoding());
7205   if (imm8 == 1) {
7206     emit_int8((unsigned char)0xD1);
7207     emit_int8((unsigned char)(0xD8 | encode));
7208   } else {
7209     emit_int8((unsigned char)0xC1);
7210     emit_int8((unsigned char)(0xD8 | encode));
7211     emit_int8(imm8);
7212   }
7213 }
7214 
7215 void Assembler::rorq(Register dst, int imm8) {
7216   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7217   int encode = prefixq_and_encode(dst->encoding());
7218   if (imm8 == 1) {
7219     emit_int8((unsigned char)0xD1);
7220     emit_int8((unsigned char)(0xC8 | encode));
7221   } else {
7222     emit_int8((unsigned char)0xC1);
7223     emit_int8((unsigned char)(0xc8 | encode));
7224     emit_int8(imm8);
7225   }
7226 }
7227 
7228 void Assembler::rorxq(Register dst, Register src, int imm8) {
7229   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
7230   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2,
7231                                      VEX_OPCODE_0F_3A, true, AVX_128bit, true, false);
7232   emit_int8((unsigned char)0xF0);
7233   emit_int8((unsigned char)(0xC0 | encode));
7234   emit_int8(imm8);
7235 }
7236 
7237 void Assembler::sarq(Register dst, int imm8) {
7238   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7239   int encode = prefixq_and_encode(dst->encoding());
7240   if (imm8 == 1) {
7241     emit_int8((unsigned char)0xD1);
7242     emit_int8((unsigned char)(0xF8 | encode));
7243   } else {
7244     emit_int8((unsigned char)0xC1);
7245     emit_int8((unsigned char)(0xF8 | encode));
7246     emit_int8(imm8);
7247   }
7248 }
7249 
7250 void Assembler::sarq(Register dst) {
7251   int encode = prefixq_and_encode(dst->encoding());
7252   emit_int8((unsigned char)0xD3);
7253   emit_int8((unsigned char)(0xF8 | encode));
7254 }
7255 
7256 void Assembler::sbbq(Address dst, int32_t imm32) {
7257   InstructionMark im(this);
7258   prefixq(dst);
7259   emit_arith_operand(0x81, rbx, dst, imm32);
7260 }
7261 
7262 void Assembler::sbbq(Register dst, int32_t imm32) {
7263   (void) prefixq_and_encode(dst->encoding());
7264   emit_arith(0x81, 0xD8, dst, imm32);
7265 }
7266 
7267 void Assembler::sbbq(Register dst, Address src) {
7268   InstructionMark im(this);
7269   prefixq(src, dst);
7270   emit_int8(0x1B);
7271   emit_operand(dst, src);
7272 }
7273 
7274 void Assembler::sbbq(Register dst, Register src) {
7275   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7276   emit_arith(0x1B, 0xC0, dst, src);
7277 }
7278 
7279 void Assembler::shlq(Register dst, int imm8) {
7280   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7281   int encode = prefixq_and_encode(dst->encoding());
7282   if (imm8 == 1) {
7283     emit_int8((unsigned char)0xD1);
7284     emit_int8((unsigned char)(0xE0 | encode));
7285   } else {
7286     emit_int8((unsigned char)0xC1);
7287     emit_int8((unsigned char)(0xE0 | encode));
7288     emit_int8(imm8);
7289   }
7290 }
7291 
7292 void Assembler::shlq(Register dst) {
7293   int encode = prefixq_and_encode(dst->encoding());
7294   emit_int8((unsigned char)0xD3);
7295   emit_int8((unsigned char)(0xE0 | encode));
7296 }
7297 
7298 void Assembler::shrq(Register dst, int imm8) {
7299   assert(isShiftCount(imm8 >> 1), "illegal shift count");
7300   int encode = prefixq_and_encode(dst->encoding());
7301   emit_int8((unsigned char)0xC1);
7302   emit_int8((unsigned char)(0xE8 | encode));
7303   emit_int8(imm8);
7304 }
7305 
7306 void Assembler::shrq(Register dst) {
7307   int encode = prefixq_and_encode(dst->encoding());
7308   emit_int8((unsigned char)0xD3);
7309   emit_int8(0xE8 | encode);
7310 }
7311 
7312 void Assembler::subq(Address dst, int32_t imm32) {
7313   InstructionMark im(this);
7314   prefixq(dst);
7315   emit_arith_operand(0x81, rbp, dst, imm32);
7316 }
7317 
7318 void Assembler::subq(Address dst, Register src) {
7319   InstructionMark im(this);
7320   prefixq(dst, src);
7321   emit_int8(0x29);
7322   emit_operand(src, dst);
7323 }
7324 
7325 void Assembler::subq(Register dst, int32_t imm32) {
7326   (void) prefixq_and_encode(dst->encoding());
7327   emit_arith(0x81, 0xE8, dst, imm32);
7328 }
7329 
7330 // Force generation of a 4 byte immediate value even if it fits into 8bit
7331 void Assembler::subq_imm32(Register dst, int32_t imm32) {
7332   (void) prefixq_and_encode(dst->encoding());
7333   emit_arith_imm32(0x81, 0xE8, dst, imm32);
7334 }
7335 
7336 void Assembler::subq(Register dst, Address src) {
7337   InstructionMark im(this);
7338   prefixq(src, dst);
7339   emit_int8(0x2B);
7340   emit_operand(dst, src);
7341 }
7342 
7343 void Assembler::subq(Register dst, Register src) {
7344   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7345   emit_arith(0x2B, 0xC0, dst, src);
7346 }
7347 
7348 void Assembler::testq(Register dst, int32_t imm32) {
7349   // not using emit_arith because test
7350   // doesn't support sign-extension of
7351   // 8bit operands
7352   int encode = dst->encoding();
7353   if (encode == 0) {
7354     prefix(REX_W);
7355     emit_int8((unsigned char)0xA9);
7356   } else {
7357     encode = prefixq_and_encode(encode);
7358     emit_int8((unsigned char)0xF7);
7359     emit_int8((unsigned char)(0xC0 | encode));
7360   }
7361   emit_int32(imm32);
7362 }
7363 
7364 void Assembler::testq(Register dst, Register src) {
7365   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7366   emit_arith(0x85, 0xC0, dst, src);
7367 }
7368 
7369 void Assembler::xaddq(Address dst, Register src) {
7370   InstructionMark im(this);
7371   prefixq(dst, src);
7372   emit_int8(0x0F);
7373   emit_int8((unsigned char)0xC1);
7374   emit_operand(src, dst);
7375 }
7376 
7377 void Assembler::xchgq(Register dst, Address src) {
7378   InstructionMark im(this);
7379   prefixq(src, dst);
7380   emit_int8((unsigned char)0x87);
7381   emit_operand(dst, src);
7382 }
7383 
7384 void Assembler::xchgq(Register dst, Register src) {
7385   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
7386   emit_int8((unsigned char)0x87);
7387   emit_int8((unsigned char)(0xc0 | encode));
7388 }
7389 
7390 void Assembler::xorq(Register dst, Register src) {
7391   (void) prefixq_and_encode(dst->encoding(), src->encoding());
7392   emit_arith(0x33, 0xC0, dst, src);
7393 }
7394 
7395 void Assembler::xorq(Register dst, Address src) {
7396   InstructionMark im(this);
7397   prefixq(src, dst);
7398   emit_int8(0x33);
7399   emit_operand(dst, src);
7400 }
7401 
7402 #endif // !LP64