1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc_interface/collectedHeap.inline.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "memory/cardTableModRefBS.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #ifndef SERIALGC
  40 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  41 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  42 #include "gc_implementation/g1/heapRegion.hpp"
  43 #endif
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  57   _is_lval = false;
  58   _target = target;
  59   switch (rtype) {
  60   case relocInfo::oop_type:
  61   case relocInfo::metadata_type:
  62     // Oops are a special case. Normally they would be their own section
  63     // but in cases like icBuffer they are literals in the code stream that
  64     // we don't have a section for. We use none so that we get a literal address
  65     // which is always patchable.
  66     break;
  67   case relocInfo::external_word_type:
  68     _rspec = external_word_Relocation::spec(target);
  69     break;
  70   case relocInfo::internal_word_type:
  71     _rspec = internal_word_Relocation::spec(target);
  72     break;
  73   case relocInfo::opt_virtual_call_type:
  74     _rspec = opt_virtual_call_Relocation::spec();
  75     break;
  76   case relocInfo::static_call_type:
  77     _rspec = static_call_Relocation::spec();
  78     break;
  79   case relocInfo::runtime_call_type:
  80     _rspec = runtime_call_Relocation::spec();
  81     break;
  82   case relocInfo::poll_type:
  83   case relocInfo::poll_return_type:
  84     _rspec = Relocation::spec_simple(rtype);
  85     break;
  86   case relocInfo::none:
  87     break;
  88   default:
  89     ShouldNotReachHere();
  90     break;
  91   }
  92 }
  93 
  94 // Implementation of Address
  95 
  96 #ifdef _LP64
  97 
  98 Address Address::make_array(ArrayAddress adr) {
  99   // Not implementable on 64bit machines
 100   // Should have been handled higher up the call chain.
 101   ShouldNotReachHere();
 102   return Address();
 103 }
 104 
 105 // exceedingly dangerous constructor
 106 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 107   _base  = noreg;
 108   _index = noreg;
 109   _scale = no_scale;
 110   _disp  = disp;
 111   switch (rtype) {
 112     case relocInfo::external_word_type:
 113       _rspec = external_word_Relocation::spec(loc);
 114       break;
 115     case relocInfo::internal_word_type:
 116       _rspec = internal_word_Relocation::spec(loc);
 117       break;
 118     case relocInfo::runtime_call_type:
 119       // HMM
 120       _rspec = runtime_call_Relocation::spec();
 121       break;
 122     case relocInfo::poll_type:
 123     case relocInfo::poll_return_type:
 124       _rspec = Relocation::spec_simple(rtype);
 125       break;
 126     case relocInfo::none:
 127       break;
 128     default:
 129       ShouldNotReachHere();
 130   }
 131 }
 132 #else // LP64
 133 
 134 Address Address::make_array(ArrayAddress adr) {
 135   AddressLiteral base = adr.base();
 136   Address index = adr.index();
 137   assert(index._disp == 0, "must not have disp"); // maybe it can?
 138   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 139   array._rspec = base._rspec;
 140   return array;
 141 }
 142 
 143 // exceedingly dangerous constructor
 144 Address::Address(address loc, RelocationHolder spec) {
 145   _base  = noreg;
 146   _index = noreg;
 147   _scale = no_scale;
 148   _disp  = (intptr_t) loc;
 149   _rspec = spec;
 150 }
 151 
 152 #endif // _LP64
 153 
 154 
 155 
 156 // Convert the raw encoding form into the form expected by the constructor for
 157 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 158 // that to noreg for the Address constructor.
 159 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 160   RelocationHolder rspec;
 161   if (disp_reloc != relocInfo::none) {
 162     rspec = Relocation::spec_simple(disp_reloc);
 163   }
 164   bool valid_index = index != rsp->encoding();
 165   if (valid_index) {
 166     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 167     madr._rspec = rspec;
 168     return madr;
 169   } else {
 170     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 171     madr._rspec = rspec;
 172     return madr;
 173   }
 174 }
 175 
 176 // Implementation of Assembler
 177 
 178 int AbstractAssembler::code_fill_byte() {
 179   return (u_char)'\xF4'; // hlt
 180 }
 181 
 182 // make this go away someday
 183 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 184   if (rtype == relocInfo::none)
 185         emit_long(data);
 186   else  emit_data(data, Relocation::spec_simple(rtype), format);
 187 }
 188 
 189 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 190   assert(imm_operand == 0, "default format must be immediate in this file");
 191   assert(inst_mark() != NULL, "must be inside InstructionMark");
 192   if (rspec.type() !=  relocInfo::none) {
 193     #ifdef ASSERT
 194       check_relocation(rspec, format);
 195     #endif
 196     // Do not use AbstractAssembler::relocate, which is not intended for
 197     // embedded words.  Instead, relocate to the enclosing instruction.
 198 
 199     // hack. call32 is too wide for mask so use disp32
 200     if (format == call32_operand)
 201       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 202     else
 203       code_section()->relocate(inst_mark(), rspec, format);
 204   }
 205   emit_long(data);
 206 }
 207 
 208 static int encode(Register r) {
 209   int enc = r->encoding();
 210   if (enc >= 8) {
 211     enc -= 8;
 212   }
 213   return enc;
 214 }
 215 
 216 static int encode(XMMRegister r) {
 217   int enc = r->encoding();
 218   if (enc >= 8) {
 219     enc -= 8;
 220   }
 221   return enc;
 222 }
 223 
 224 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 225   assert(dst->has_byte_register(), "must have byte register");
 226   assert(isByte(op1) && isByte(op2), "wrong opcode");
 227   assert(isByte(imm8), "not a byte");
 228   assert((op1 & 0x01) == 0, "should be 8bit operation");
 229   emit_int8(op1);
 230   emit_int8(op2 | encode(dst));
 231   emit_int8(imm8);
 232 }
 233 
 234 
 235 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 236   assert(isByte(op1) && isByte(op2), "wrong opcode");
 237   assert((op1 & 0x01) == 1, "should be 32bit operation");
 238   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 239   if (is8bit(imm32)) {
 240     emit_int8(op1 | 0x02); // set sign bit
 241     emit_int8(op2 | encode(dst));
 242     emit_int8(imm32 & 0xFF);
 243   } else {
 244     emit_int8(op1);
 245     emit_int8(op2 | encode(dst));
 246     emit_long(imm32);
 247   }
 248 }
 249 
 250 // Force generation of a 4 byte immediate value even if it fits into 8bit
 251 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 252   assert(isByte(op1) && isByte(op2), "wrong opcode");
 253   assert((op1 & 0x01) == 1, "should be 32bit operation");
 254   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 255   emit_int8(op1);
 256   emit_int8(op2 | encode(dst));
 257   emit_long(imm32);
 258 }
 259 
 260 // immediate-to-memory forms
 261 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 262   assert((op1 & 0x01) == 1, "should be 32bit operation");
 263   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 264   if (is8bit(imm32)) {
 265     emit_int8(op1 | 0x02); // set sign bit
 266     emit_operand(rm, adr, 1);
 267     emit_int8(imm32 & 0xFF);
 268   } else {
 269     emit_int8(op1);
 270     emit_operand(rm, adr, 4);
 271     emit_long(imm32);
 272   }
 273 }
 274 
 275 
 276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 277   assert(isByte(op1) && isByte(op2), "wrong opcode");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst) << 3 | encode(src));
 280 }
 281 
 282 
 283 void Assembler::emit_operand(Register reg, Register base, Register index,
 284                              Address::ScaleFactor scale, int disp,
 285                              RelocationHolder const& rspec,
 286                              int rip_relative_correction) {
 287   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 288 
 289   // Encode the registers as needed in the fields they are used in
 290 
 291   int regenc = encode(reg) << 3;
 292   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 293   int baseenc = base->is_valid() ? encode(base) : 0;
 294 
 295   if (base->is_valid()) {
 296     if (index->is_valid()) {
 297       assert(scale != Address::no_scale, "inconsistent address");
 298       // [base + index*scale + disp]
 299       if (disp == 0 && rtype == relocInfo::none  &&
 300           base != rbp LP64_ONLY(&& base != r13)) {
 301         // [base + index*scale]
 302         // [00 reg 100][ss index base]
 303         assert(index != rsp, "illegal addressing mode");
 304         emit_int8(0x04 | regenc);
 305         emit_int8(scale << 6 | indexenc | baseenc);
 306       } else if (is8bit(disp) && rtype == relocInfo::none) {
 307         // [base + index*scale + imm8]
 308         // [01 reg 100][ss index base] imm8
 309         assert(index != rsp, "illegal addressing mode");
 310         emit_int8(0x44 | regenc);
 311         emit_int8(scale << 6 | indexenc | baseenc);
 312         emit_int8(disp & 0xFF);
 313       } else {
 314         // [base + index*scale + disp32]
 315         // [10 reg 100][ss index base] disp32
 316         assert(index != rsp, "illegal addressing mode");
 317         emit_int8(0x84 | regenc);
 318         emit_int8(scale << 6 | indexenc | baseenc);
 319         emit_data(disp, rspec, disp32_operand);
 320       }
 321     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 322       // [rsp + disp]
 323       if (disp == 0 && rtype == relocInfo::none) {
 324         // [rsp]
 325         // [00 reg 100][00 100 100]
 326         emit_int8(0x04 | regenc);
 327         emit_int8(0x24);
 328       } else if (is8bit(disp) && rtype == relocInfo::none) {
 329         // [rsp + imm8]
 330         // [01 reg 100][00 100 100] disp8
 331         emit_int8(0x44 | regenc);
 332         emit_int8(0x24);
 333         emit_int8(disp & 0xFF);
 334       } else {
 335         // [rsp + imm32]
 336         // [10 reg 100][00 100 100] disp32
 337         emit_int8(0x84 | regenc);
 338         emit_int8(0x24);
 339         emit_data(disp, rspec, disp32_operand);
 340       }
 341     } else {
 342       // [base + disp]
 343       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 344       if (disp == 0 && rtype == relocInfo::none &&
 345           base != rbp LP64_ONLY(&& base != r13)) {
 346         // [base]
 347         // [00 reg base]
 348         emit_int8(0x00 | regenc | baseenc);
 349       } else if (is8bit(disp) && rtype == relocInfo::none) {
 350         // [base + disp8]
 351         // [01 reg base] disp8
 352         emit_int8(0x40 | regenc | baseenc);
 353         emit_int8(disp & 0xFF);
 354       } else {
 355         // [base + disp32]
 356         // [10 reg base] disp32
 357         emit_int8(0x80 | regenc | baseenc);
 358         emit_data(disp, rspec, disp32_operand);
 359       }
 360     }
 361   } else {
 362     if (index->is_valid()) {
 363       assert(scale != Address::no_scale, "inconsistent address");
 364       // [index*scale + disp]
 365       // [00 reg 100][ss index 101] disp32
 366       assert(index != rsp, "illegal addressing mode");
 367       emit_int8(0x04 | regenc);
 368       emit_int8(scale << 6 | indexenc | 0x05);
 369       emit_data(disp, rspec, disp32_operand);
 370     } else if (rtype != relocInfo::none ) {
 371       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 372       // [00 000 101] disp32
 373 
 374       emit_int8(0x05 | regenc);
 375       // Note that the RIP-rel. correction applies to the generated
 376       // disp field, but _not_ to the target address in the rspec.
 377 
 378       // disp was created by converting the target address minus the pc
 379       // at the start of the instruction. That needs more correction here.
 380       // intptr_t disp = target - next_ip;
 381       assert(inst_mark() != NULL, "must be inside InstructionMark");
 382       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 383       int64_t adjusted = disp;
 384       // Do rip-rel adjustment for 64bit
 385       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 386       assert(is_simm32(adjusted),
 387              "must be 32bit offset (RIP relative address)");
 388       emit_data((int32_t) adjusted, rspec, disp32_operand);
 389 
 390     } else {
 391       // 32bit never did this, did everything as the rip-rel/disp code above
 392       // [disp] ABSOLUTE
 393       // [00 reg 100][00 100 101] disp32
 394       emit_int8(0x04 | regenc);
 395       emit_int8(0x25);
 396       emit_data(disp, rspec, disp32_operand);
 397     }
 398   }
 399 }
 400 
 401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 402                              Address::ScaleFactor scale, int disp,
 403                              RelocationHolder const& rspec) {
 404   emit_operand((Register)reg, base, index, scale, disp, rspec);
 405 }
 406 
 407 // Secret local extension to Assembler::WhichOperand:
 408 #define end_pc_operand (_WhichOperand_limit)
 409 
 410 address Assembler::locate_operand(address inst, WhichOperand which) {
 411   // Decode the given instruction, and return the address of
 412   // an embedded 32-bit operand word.
 413 
 414   // If "which" is disp32_operand, selects the displacement portion
 415   // of an effective address specifier.
 416   // If "which" is imm64_operand, selects the trailing immediate constant.
 417   // If "which" is call32_operand, selects the displacement of a call or jump.
 418   // Caller is responsible for ensuring that there is such an operand,
 419   // and that it is 32/64 bits wide.
 420 
 421   // If "which" is end_pc_operand, find the end of the instruction.
 422 
 423   address ip = inst;
 424   bool is_64bit = false;
 425 
 426   debug_only(bool has_disp32 = false);
 427   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 428 
 429   again_after_prefix:
 430   switch (0xFF & *ip++) {
 431 
 432   // These convenience macros generate groups of "case" labels for the switch.
 433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 435              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 436 #define REP16(x) REP8((x)+0): \
 437               case REP8((x)+8)
 438 
 439   case CS_segment:
 440   case SS_segment:
 441   case DS_segment:
 442   case ES_segment:
 443   case FS_segment:
 444   case GS_segment:
 445     // Seems dubious
 446     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 447     assert(ip == inst+1, "only one prefix allowed");
 448     goto again_after_prefix;
 449 
 450   case 0x67:
 451   case REX:
 452   case REX_B:
 453   case REX_X:
 454   case REX_XB:
 455   case REX_R:
 456   case REX_RB:
 457   case REX_RX:
 458   case REX_RXB:
 459     NOT_LP64(assert(false, "64bit prefixes"));
 460     goto again_after_prefix;
 461 
 462   case REX_W:
 463   case REX_WB:
 464   case REX_WX:
 465   case REX_WXB:
 466   case REX_WR:
 467   case REX_WRB:
 468   case REX_WRX:
 469   case REX_WRXB:
 470     NOT_LP64(assert(false, "64bit prefixes"));
 471     is_64bit = true;
 472     goto again_after_prefix;
 473 
 474   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 475   case 0x88: // movb a, r
 476   case 0x89: // movl a, r
 477   case 0x8A: // movb r, a
 478   case 0x8B: // movl r, a
 479   case 0x8F: // popl a
 480     debug_only(has_disp32 = true);
 481     break;
 482 
 483   case 0x68: // pushq #32
 484     if (which == end_pc_operand) {
 485       return ip + 4;
 486     }
 487     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 488     return ip;                  // not produced by emit_operand
 489 
 490   case 0x66: // movw ... (size prefix)
 491     again_after_size_prefix2:
 492     switch (0xFF & *ip++) {
 493     case REX:
 494     case REX_B:
 495     case REX_X:
 496     case REX_XB:
 497     case REX_R:
 498     case REX_RB:
 499     case REX_RX:
 500     case REX_RXB:
 501     case REX_W:
 502     case REX_WB:
 503     case REX_WX:
 504     case REX_WXB:
 505     case REX_WR:
 506     case REX_WRB:
 507     case REX_WRX:
 508     case REX_WRXB:
 509       NOT_LP64(assert(false, "64bit prefix found"));
 510       goto again_after_size_prefix2;
 511     case 0x8B: // movw r, a
 512     case 0x89: // movw a, r
 513       debug_only(has_disp32 = true);
 514       break;
 515     case 0xC7: // movw a, #16
 516       debug_only(has_disp32 = true);
 517       tail_size = 2;  // the imm16
 518       break;
 519     case 0x0F: // several SSE/SSE2 variants
 520       ip--;    // reparse the 0x0F
 521       goto again_after_prefix;
 522     default:
 523       ShouldNotReachHere();
 524     }
 525     break;
 526 
 527   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 528     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 529     // these asserts are somewhat nonsensical
 530 #ifndef _LP64
 531     assert(which == imm_operand || which == disp32_operand,
 532            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
 533 #else
 534     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 535            which == narrow_oop_operand && !is_64bit,
 536            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
 537 #endif // _LP64
 538     return ip;
 539 
 540   case 0x69: // imul r, a, #32
 541   case 0xC7: // movl a, #32(oop?)
 542     tail_size = 4;
 543     debug_only(has_disp32 = true); // has both kinds of operands!
 544     break;
 545 
 546   case 0x0F: // movx..., etc.
 547     switch (0xFF & *ip++) {
 548     case 0x3A: // pcmpestri
 549       tail_size = 1;
 550     case 0x38: // ptest, pmovzxbw
 551       ip++; // skip opcode
 552       debug_only(has_disp32 = true); // has both kinds of operands!
 553       break;
 554 
 555     case 0x70: // pshufd r, r/a, #8
 556       debug_only(has_disp32 = true); // has both kinds of operands!
 557     case 0x73: // psrldq r, #8
 558       tail_size = 1;
 559       break;
 560 
 561     case 0x12: // movlps
 562     case 0x28: // movaps
 563     case 0x2E: // ucomiss
 564     case 0x2F: // comiss
 565     case 0x54: // andps
 566     case 0x55: // andnps
 567     case 0x56: // orps
 568     case 0x57: // xorps
 569     case 0x6E: // movd
 570     case 0x7E: // movd
 571     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 572       debug_only(has_disp32 = true);
 573       break;
 574 
 575     case 0xAD: // shrd r, a, %cl
 576     case 0xAF: // imul r, a
 577     case 0xBE: // movsbl r, a (movsxb)
 578     case 0xBF: // movswl r, a (movsxw)
 579     case 0xB6: // movzbl r, a (movzxb)
 580     case 0xB7: // movzwl r, a (movzxw)
 581     case REP16(0x40): // cmovl cc, r, a
 582     case 0xB0: // cmpxchgb
 583     case 0xB1: // cmpxchg
 584     case 0xC1: // xaddl
 585     case 0xC7: // cmpxchg8
 586     case REP16(0x90): // setcc a
 587       debug_only(has_disp32 = true);
 588       // fall out of the switch to decode the address
 589       break;
 590 
 591     case 0xC4: // pinsrw r, a, #8
 592       debug_only(has_disp32 = true);
 593     case 0xC5: // pextrw r, r, #8
 594       tail_size = 1;  // the imm8
 595       break;
 596 
 597     case 0xAC: // shrd r, a, #8
 598       debug_only(has_disp32 = true);
 599       tail_size = 1;  // the imm8
 600       break;
 601 
 602     case REP16(0x80): // jcc rdisp32
 603       if (which == end_pc_operand)  return ip + 4;
 604       assert(which == call32_operand, "jcc has no disp32 or imm");
 605       return ip;
 606     default:
 607       ShouldNotReachHere();
 608     }
 609     break;
 610 
 611   case 0x81: // addl a, #32; addl r, #32
 612     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 613     // on 32bit in the case of cmpl, the imm might be an oop
 614     tail_size = 4;
 615     debug_only(has_disp32 = true); // has both kinds of operands!
 616     break;
 617 
 618   case 0x83: // addl a, #8; addl r, #8
 619     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 620     debug_only(has_disp32 = true); // has both kinds of operands!
 621     tail_size = 1;
 622     break;
 623 
 624   case 0x9B:
 625     switch (0xFF & *ip++) {
 626     case 0xD9: // fnstcw a
 627       debug_only(has_disp32 = true);
 628       break;
 629     default:
 630       ShouldNotReachHere();
 631     }
 632     break;
 633 
 634   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 635   case REP4(0x10): // adc...
 636   case REP4(0x20): // and...
 637   case REP4(0x30): // xor...
 638   case REP4(0x08): // or...
 639   case REP4(0x18): // sbb...
 640   case REP4(0x28): // sub...
 641   case 0xF7: // mull a
 642   case 0x8D: // lea r, a
 643   case 0x87: // xchg r, a
 644   case REP4(0x38): // cmp...
 645   case 0x85: // test r, a
 646     debug_only(has_disp32 = true); // has both kinds of operands!
 647     break;
 648 
 649   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 650   case 0xC6: // movb a, #8
 651   case 0x80: // cmpb a, #8
 652   case 0x6B: // imul r, a, #8
 653     debug_only(has_disp32 = true); // has both kinds of operands!
 654     tail_size = 1; // the imm8
 655     break;
 656 
 657   case 0xC4: // VEX_3bytes
 658   case 0xC5: // VEX_2bytes
 659     assert((UseAVX > 0), "shouldn't have VEX prefix");
 660     assert(ip == inst+1, "no prefixes allowed");
 661     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 662     // but they have prefix 0x0F and processed when 0x0F processed above.
 663     //
 664     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 665     // instructions (these instructions are not supported in 64-bit mode).
 666     // To distinguish them bits [7:6] are set in the VEX second byte since
 667     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 668     // those VEX bits REX and vvvv bits are inverted.
 669     //
 670     // Fortunately C2 doesn't generate these instructions so we don't need
 671     // to check for them in product version.
 672 
 673     // Check second byte
 674     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 675 
 676     // First byte
 677     if ((0xFF & *inst) == VEX_3bytes) {
 678       ip++; // third byte
 679       is_64bit = ((VEX_W & *ip) == VEX_W);
 680     }
 681     ip++; // opcode
 682     // To find the end of instruction (which == end_pc_operand).
 683     switch (0xFF & *ip) {
 684     case 0x61: // pcmpestri r, r/a, #8
 685     case 0x70: // pshufd r, r/a, #8
 686     case 0x73: // psrldq r, #8
 687       tail_size = 1;  // the imm8
 688       break;
 689     default:
 690       break;
 691     }
 692     ip++; // skip opcode
 693     debug_only(has_disp32 = true); // has both kinds of operands!
 694     break;
 695 
 696   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 697   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 698   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 699   case 0xDD: // fld_d a; fst_d a; fstp_d a
 700   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 701   case 0xDF: // fild_d a; fistp_d a
 702   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 703   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 704   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 705     debug_only(has_disp32 = true);
 706     break;
 707 
 708   case 0xE8: // call rdisp32
 709   case 0xE9: // jmp  rdisp32
 710     if (which == end_pc_operand)  return ip + 4;
 711     assert(which == call32_operand, "call has no disp32 or imm");
 712     return ip;
 713 
 714   case 0xF0:                    // Lock
 715     assert(os::is_MP(), "only on MP");
 716     goto again_after_prefix;
 717 
 718   case 0xF3:                    // For SSE
 719   case 0xF2:                    // For SSE2
 720     switch (0xFF & *ip++) {
 721     case REX:
 722     case REX_B:
 723     case REX_X:
 724     case REX_XB:
 725     case REX_R:
 726     case REX_RB:
 727     case REX_RX:
 728     case REX_RXB:
 729     case REX_W:
 730     case REX_WB:
 731     case REX_WX:
 732     case REX_WXB:
 733     case REX_WR:
 734     case REX_WRB:
 735     case REX_WRX:
 736     case REX_WRXB:
 737       NOT_LP64(assert(false, "found 64bit prefix"));
 738       ip++;
 739     default:
 740       ip++;
 741     }
 742     debug_only(has_disp32 = true); // has both kinds of operands!
 743     break;
 744 
 745   default:
 746     ShouldNotReachHere();
 747 
 748 #undef REP8
 749 #undef REP16
 750   }
 751 
 752   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 753 #ifdef _LP64
 754   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 755 #else
 756   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 757   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 758 #endif // LP64
 759   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 760 
 761   // parse the output of emit_operand
 762   int op2 = 0xFF & *ip++;
 763   int base = op2 & 0x07;
 764   int op3 = -1;
 765   const int b100 = 4;
 766   const int b101 = 5;
 767   if (base == b100 && (op2 >> 6) != 3) {
 768     op3 = 0xFF & *ip++;
 769     base = op3 & 0x07;   // refetch the base
 770   }
 771   // now ip points at the disp (if any)
 772 
 773   switch (op2 >> 6) {
 774   case 0:
 775     // [00 reg  100][ss index base]
 776     // [00 reg  100][00   100  esp]
 777     // [00 reg base]
 778     // [00 reg  100][ss index  101][disp32]
 779     // [00 reg  101]               [disp32]
 780 
 781     if (base == b101) {
 782       if (which == disp32_operand)
 783         return ip;              // caller wants the disp32
 784       ip += 4;                  // skip the disp32
 785     }
 786     break;
 787 
 788   case 1:
 789     // [01 reg  100][ss index base][disp8]
 790     // [01 reg  100][00   100  esp][disp8]
 791     // [01 reg base]               [disp8]
 792     ip += 1;                    // skip the disp8
 793     break;
 794 
 795   case 2:
 796     // [10 reg  100][ss index base][disp32]
 797     // [10 reg  100][00   100  esp][disp32]
 798     // [10 reg base]               [disp32]
 799     if (which == disp32_operand)
 800       return ip;                // caller wants the disp32
 801     ip += 4;                    // skip the disp32
 802     break;
 803 
 804   case 3:
 805     // [11 reg base]  (not a memory addressing mode)
 806     break;
 807   }
 808 
 809   if (which == end_pc_operand) {
 810     return ip + tail_size;
 811   }
 812 
 813 #ifdef _LP64
 814   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
 815 #else
 816   assert(which == imm_operand, "instruction has only an imm field");
 817 #endif // LP64
 818   return ip;
 819 }
 820 
 821 address Assembler::locate_next_instruction(address inst) {
 822   // Secretly share code with locate_operand:
 823   return locate_operand(inst, end_pc_operand);
 824 }
 825 
 826 
 827 #ifdef ASSERT
 828 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 829   address inst = inst_mark();
 830   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
 831   address opnd;
 832 
 833   Relocation* r = rspec.reloc();
 834   if (r->type() == relocInfo::none) {
 835     return;
 836   } else if (r->is_call() || format == call32_operand) {
 837     // assert(format == imm32_operand, "cannot specify a nonzero format");
 838     opnd = locate_operand(inst, call32_operand);
 839   } else if (r->is_data()) {
 840     assert(format == imm_operand || format == disp32_operand
 841            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
 842     opnd = locate_operand(inst, (WhichOperand)format);
 843   } else {
 844     assert(format == imm_operand, "cannot specify a format");
 845     return;
 846   }
 847   assert(opnd == pc(), "must put operand where relocs can find it");
 848 }
 849 #endif // ASSERT
 850 
 851 void Assembler::emit_operand32(Register reg, Address adr) {
 852   assert(reg->encoding() < 8, "no extended registers");
 853   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 854   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 855                adr._rspec);
 856 }
 857 
 858 void Assembler::emit_operand(Register reg, Address adr,
 859                              int rip_relative_correction) {
 860   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 861                adr._rspec,
 862                rip_relative_correction);
 863 }
 864 
 865 void Assembler::emit_operand(XMMRegister reg, Address adr) {
 866   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 867                adr._rspec);
 868 }
 869 
 870 // MMX operations
 871 void Assembler::emit_operand(MMXRegister reg, Address adr) {
 872   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 873   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 874 }
 875 
 876 // work around gcc (3.2.1-7a) bug
 877 void Assembler::emit_operand(Address adr, MMXRegister reg) {
 878   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 879   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 880 }
 881 
 882 
 883 void Assembler::emit_farith(int b1, int b2, int i) {
 884   assert(isByte(b1) && isByte(b2), "wrong opcode");
 885   assert(0 <= i &&  i < 8, "illegal stack offset");
 886   emit_int8(b1);
 887   emit_int8(b2 + i);
 888 }
 889 
 890 
 891 // Now the Assembler instructions (identical for 32/64 bits)
 892 
 893 void Assembler::adcl(Address dst, int32_t imm32) {
 894   InstructionMark im(this);
 895   prefix(dst);
 896   emit_arith_operand(0x81, rdx, dst, imm32);
 897 }
 898 
 899 void Assembler::adcl(Address dst, Register src) {
 900   InstructionMark im(this);
 901   prefix(dst, src);
 902   emit_int8(0x11);
 903   emit_operand(src, dst);
 904 }
 905 
 906 void Assembler::adcl(Register dst, int32_t imm32) {
 907   prefix(dst);
 908   emit_arith(0x81, 0xD0, dst, imm32);
 909 }
 910 
 911 void Assembler::adcl(Register dst, Address src) {
 912   InstructionMark im(this);
 913   prefix(src, dst);
 914   emit_int8(0x13);
 915   emit_operand(dst, src);
 916 }
 917 
 918 void Assembler::adcl(Register dst, Register src) {
 919   (void) prefix_and_encode(dst->encoding(), src->encoding());
 920   emit_arith(0x13, 0xC0, dst, src);
 921 }
 922 
 923 void Assembler::addl(Address dst, int32_t imm32) {
 924   InstructionMark im(this);
 925   prefix(dst);
 926   emit_arith_operand(0x81, rax, dst, imm32);
 927 }
 928 
 929 void Assembler::addl(Address dst, Register src) {
 930   InstructionMark im(this);
 931   prefix(dst, src);
 932   emit_int8(0x01);
 933   emit_operand(src, dst);
 934 }
 935 
 936 void Assembler::addl(Register dst, int32_t imm32) {
 937   prefix(dst);
 938   emit_arith(0x81, 0xC0, dst, imm32);
 939 }
 940 
 941 void Assembler::addl(Register dst, Address src) {
 942   InstructionMark im(this);
 943   prefix(src, dst);
 944   emit_int8(0x03);
 945   emit_operand(dst, src);
 946 }
 947 
 948 void Assembler::addl(Register dst, Register src) {
 949   (void) prefix_and_encode(dst->encoding(), src->encoding());
 950   emit_arith(0x03, 0xC0, dst, src);
 951 }
 952 
 953 void Assembler::addr_nop_4() {
 954   assert(UseAddressNop, "no CPU support");
 955   // 4 bytes: NOP DWORD PTR [EAX+0]
 956   emit_int8(0x0F);
 957   emit_int8(0x1F);
 958   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
 959   emit_int8(0);    // 8-bits offset (1 byte)
 960 }
 961 
 962 void Assembler::addr_nop_5() {
 963   assert(UseAddressNop, "no CPU support");
 964   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
 965   emit_int8(0x0F);
 966   emit_int8(0x1F);
 967   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
 968   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 969   emit_int8(0);    // 8-bits offset (1 byte)
 970 }
 971 
 972 void Assembler::addr_nop_7() {
 973   assert(UseAddressNop, "no CPU support");
 974   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
 975   emit_int8(0x0F);
 976   emit_int8(0x1F);
 977   emit_int8((unsigned char)0x80);
 978                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 979   emit_long(0);    // 32-bits offset (4 bytes)
 980 }
 981 
 982 void Assembler::addr_nop_8() {
 983   assert(UseAddressNop, "no CPU support");
 984   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 985   emit_int8(0x0F);
 986   emit_int8(0x1F);
 987   emit_int8((unsigned char)0x84);
 988                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 989   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 990   emit_long(0);    // 32-bits offset (4 bytes)
 991 }
 992 
 993 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 994   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 995   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 996 }
 997 
 998 void Assembler::addsd(XMMRegister dst, Address src) {
 999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1000   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1001 }
1002 
1003 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1004   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1005   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1006 }
1007 
1008 void Assembler::addss(XMMRegister dst, Address src) {
1009   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1010   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1011 }
1012 
1013 void Assembler::aesdec(XMMRegister dst, Address src) {
1014   assert(VM_Version::supports_aes(), "");
1015   InstructionMark im(this);
1016   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017   emit_int8((unsigned char)0xDE);
1018   emit_operand(dst, src);
1019 }
1020 
1021 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1022   assert(VM_Version::supports_aes(), "");
1023   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1024   emit_int8((unsigned char)0xDE);
1025   emit_int8(0xC0 | encode);
1026 }
1027 
1028 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1029   assert(VM_Version::supports_aes(), "");
1030   InstructionMark im(this);
1031   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032   emit_int8((unsigned char)0xDF);
1033   emit_operand(dst, src);
1034 }
1035 
1036 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1037   assert(VM_Version::supports_aes(), "");
1038   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1039   emit_int8((unsigned char)0xDF);
1040   emit_int8((unsigned char)(0xC0 | encode));
1041 }
1042 
1043 void Assembler::aesenc(XMMRegister dst, Address src) {
1044   assert(VM_Version::supports_aes(), "");
1045   InstructionMark im(this);
1046   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047   emit_int8((unsigned char)0xDC);
1048   emit_operand(dst, src);
1049 }
1050 
1051 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1052   assert(VM_Version::supports_aes(), "");
1053   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1054   emit_int8((unsigned char)0xDC);
1055   emit_int8(0xC0 | encode);
1056 }
1057 
1058 void Assembler::aesenclast(XMMRegister dst, Address src) {
1059   assert(VM_Version::supports_aes(), "");
1060   InstructionMark im(this);
1061   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062   emit_int8((unsigned char)0xDD);
1063   emit_operand(dst, src);
1064 }
1065 
1066 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1067   assert(VM_Version::supports_aes(), "");
1068   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1069   emit_int8((unsigned char)0xDD);
1070   emit_int8((unsigned char)(0xC0 | encode));
1071 }
1072 
1073 
1074 void Assembler::andl(Address dst, int32_t imm32) {
1075   InstructionMark im(this);
1076   prefix(dst);
1077   emit_int8((unsigned char)0x81);
1078   emit_operand(rsp, dst, 4);
1079   emit_long(imm32);
1080 }
1081 
1082 void Assembler::andl(Register dst, int32_t imm32) {
1083   prefix(dst);
1084   emit_arith(0x81, 0xE0, dst, imm32);
1085 }
1086 
1087 void Assembler::andl(Register dst, Address src) {
1088   InstructionMark im(this);
1089   prefix(src, dst);
1090   emit_int8(0x23);
1091   emit_operand(dst, src);
1092 }
1093 
1094 void Assembler::andl(Register dst, Register src) {
1095   (void) prefix_and_encode(dst->encoding(), src->encoding());
1096   emit_arith(0x23, 0xC0, dst, src);
1097 }
1098 
1099 void Assembler::bsfl(Register dst, Register src) {
1100   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1101   emit_int8(0x0F);
1102   emit_int8((unsigned char)0xBC);
1103   emit_int8((unsigned char)(0xC0 | encode));
1104 }
1105 
1106 void Assembler::bsrl(Register dst, Register src) {
1107   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
1108   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109   emit_int8(0x0F);
1110   emit_int8((unsigned char)0xBD);
1111   emit_int8((unsigned char)(0xC0 | encode));
1112 }
1113 
1114 void Assembler::bswapl(Register reg) { // bswap
1115   int encode = prefix_and_encode(reg->encoding());
1116   emit_int8(0x0F);
1117   emit_int8((unsigned char)(0xC8 | encode));
1118 }
1119 
1120 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1121   // suspect disp32 is always good
1122   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1123 
1124   if (L.is_bound()) {
1125     const int long_size = 5;
1126     int offs = (int)( target(L) - pc() );
1127     assert(offs <= 0, "assembler error");
1128     InstructionMark im(this);
1129     // 1110 1000 #32-bit disp
1130     emit_int8((unsigned char)0xE8);
1131     emit_data(offs - long_size, rtype, operand);
1132   } else {
1133     InstructionMark im(this);
1134     // 1110 1000 #32-bit disp
1135     L.add_patch_at(code(), locator());
1136 
1137     emit_int8((unsigned char)0xE8);
1138     emit_data(int(0), rtype, operand);
1139   }
1140 }
1141 
1142 void Assembler::call(Register dst) {
1143   int encode = prefix_and_encode(dst->encoding());
1144   emit_int8((unsigned char)0xFF);
1145   emit_int8((unsigned char)(0xD0 | encode));
1146 }
1147 
1148 
1149 void Assembler::call(Address adr) {
1150   InstructionMark im(this);
1151   prefix(adr);
1152   emit_int8((unsigned char)0xFF);
1153   emit_operand(rdx, adr);
1154 }
1155 
1156 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1157   assert(entry != NULL, "call most probably wrong");
1158   InstructionMark im(this);
1159   emit_int8((unsigned char)0xE8);
1160   intptr_t disp = entry - (pc() + sizeof(int32_t));
1161   assert(is_simm32(disp), "must be 32bit offset (call2)");
1162   // Technically, should use call32_operand, but this format is
1163   // implied by the fact that we're emitting a call instruction.
1164 
1165   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1166   emit_data((int) disp, rspec, operand);
1167 }
1168 
1169 void Assembler::cdql() {
1170   emit_int8((unsigned char)0x99);
1171 }
1172 
1173 void Assembler::cld() {
1174   emit_int8((unsigned char)0xFC);
1175 }
1176 
1177 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1178   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1179   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1180   emit_int8(0x0F);
1181   emit_int8(0x40 | cc);
1182   emit_int8((unsigned char)(0xC0 | encode));
1183 }
1184 
1185 
1186 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1187   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1188   prefix(src, dst);
1189   emit_int8(0x0F);
1190   emit_int8(0x40 | cc);
1191   emit_operand(dst, src);
1192 }
1193 
1194 void Assembler::cmpb(Address dst, int imm8) {
1195   InstructionMark im(this);
1196   prefix(dst);
1197   emit_int8((unsigned char)0x80);
1198   emit_operand(rdi, dst, 1);
1199   emit_int8(imm8);
1200 }
1201 
1202 void Assembler::cmpl(Address dst, int32_t imm32) {
1203   InstructionMark im(this);
1204   prefix(dst);
1205   emit_int8((unsigned char)0x81);
1206   emit_operand(rdi, dst, 4);
1207   emit_long(imm32);
1208 }
1209 
1210 void Assembler::cmpl(Register dst, int32_t imm32) {
1211   prefix(dst);
1212   emit_arith(0x81, 0xF8, dst, imm32);
1213 }
1214 
1215 void Assembler::cmpl(Register dst, Register src) {
1216   (void) prefix_and_encode(dst->encoding(), src->encoding());
1217   emit_arith(0x3B, 0xC0, dst, src);
1218 }
1219 
1220 
1221 void Assembler::cmpl(Register dst, Address  src) {
1222   InstructionMark im(this);
1223   prefix(src, dst);
1224   emit_int8((unsigned char)0x3B);
1225   emit_operand(dst, src);
1226 }
1227 
1228 void Assembler::cmpw(Address dst, int imm16) {
1229   InstructionMark im(this);
1230   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1231   emit_int8(0x66);
1232   emit_int8((unsigned char)0x81);
1233   emit_operand(rdi, dst, 2);
1234   emit_int16(imm16);
1235 }
1236 
1237 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1238 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1239 // The ZF is set if the compared values were equal, and cleared otherwise.
1240 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1241   InstructionMark im(this);
1242   prefix(adr, reg);
1243   emit_int8(0x0F);
1244   emit_int8((unsigned char)0xB1);
1245   emit_operand(reg, adr);
1246 }
1247 
1248 void Assembler::comisd(XMMRegister dst, Address src) {
1249   // NOTE: dbx seems to decode this as comiss even though the
1250   // 0x66 is there. Strangly ucomisd comes out correct
1251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1252   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1253 }
1254 
1255 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1257   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1258 }
1259 
1260 void Assembler::comiss(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1262   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1263 }
1264 
1265 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1266   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1267   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1268 }
1269 
1270 void Assembler::cpuid() {
1271   emit_int8(0x0F);
1272   emit_int8((unsigned char)0xA2);
1273 }
1274 
1275 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1277   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1278 }
1279 
1280 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1282   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1283 }
1284 
1285 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1286   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1287   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1288 }
1289 
1290 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1291   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1292   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1293 }
1294 
1295 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1296   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1297   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1298   emit_int8(0x2A);
1299   emit_int8((unsigned char)(0xC0 | encode));
1300 }
1301 
1302 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1303   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1304   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1305 }
1306 
1307 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1308   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1309   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1310   emit_int8(0x2A);
1311   emit_int8((unsigned char)(0xC0 | encode));
1312 }
1313 
1314 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1315   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1316   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1317 }
1318 
1319 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1320   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1321   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1322 }
1323 
1324 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1325   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1326   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1327 }
1328 
1329 
1330 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1332   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1333   emit_int8(0x2C);
1334   emit_int8((unsigned char)(0xC0 | encode));
1335 }
1336 
1337 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1338   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1339   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1340   emit_int8(0x2C);
1341   emit_int8((unsigned char)(0xC0 | encode));
1342 }
1343 
1344 void Assembler::decl(Address dst) {
1345   // Don't use it directly. Use MacroAssembler::decrement() instead.
1346   InstructionMark im(this);
1347   prefix(dst);
1348   emit_int8((unsigned char)0xFF);
1349   emit_operand(rcx, dst);
1350 }
1351 
1352 void Assembler::divsd(XMMRegister dst, Address src) {
1353   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1354   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1355 }
1356 
1357 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1359   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1360 }
1361 
1362 void Assembler::divss(XMMRegister dst, Address src) {
1363   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1364   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1365 }
1366 
1367 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1368   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1369   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1370 }
1371 
1372 void Assembler::emms() {
1373   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1374   emit_int8(0x0F);
1375   emit_int8(0x77);
1376 }
1377 
1378 void Assembler::hlt() {
1379   emit_int8((unsigned char)0xF4);
1380 }
1381 
1382 void Assembler::idivl(Register src) {
1383   int encode = prefix_and_encode(src->encoding());
1384   emit_int8((unsigned char)0xF7);
1385   emit_int8((unsigned char)(0xF8 | encode));
1386 }
1387 
1388 void Assembler::divl(Register src) { // Unsigned
1389   int encode = prefix_and_encode(src->encoding());
1390   emit_int8((unsigned char)0xF7);
1391   emit_int8((unsigned char)(0xF0 | encode));
1392 }
1393 
1394 void Assembler::imull(Register dst, Register src) {
1395   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1396   emit_int8(0x0F);
1397   emit_int8((unsigned char)0xAF);
1398   emit_int8((unsigned char)(0xC0 | encode));
1399 }
1400 
1401 
1402 void Assembler::imull(Register dst, Register src, int value) {
1403   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1404   if (is8bit(value)) {
1405     emit_int8(0x6B);
1406     emit_int8((unsigned char)(0xC0 | encode));
1407     emit_int8(value & 0xFF);
1408   } else {
1409     emit_int8(0x69);
1410     emit_int8((unsigned char)(0xC0 | encode));
1411     emit_long(value);
1412   }
1413 }
1414 
1415 void Assembler::incl(Address dst) {
1416   // Don't use it directly. Use MacroAssembler::increment() instead.
1417   InstructionMark im(this);
1418   prefix(dst);
1419   emit_int8((unsigned char)0xFF);
1420   emit_operand(rax, dst);
1421 }
1422 
1423 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1424   InstructionMark im(this);
1425   assert((0 <= cc) && (cc < 16), "illegal cc");
1426   if (L.is_bound()) {
1427     address dst = target(L);
1428     assert(dst != NULL, "jcc most probably wrong");
1429 
1430     const int short_size = 2;
1431     const int long_size = 6;
1432     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1433     if (maybe_short && is8bit(offs - short_size)) {
1434       // 0111 tttn #8-bit disp
1435       emit_int8(0x70 | cc);
1436       emit_int8((offs - short_size) & 0xFF);
1437     } else {
1438       // 0000 1111 1000 tttn #32-bit disp
1439       assert(is_simm32(offs - long_size),
1440              "must be 32bit offset (call4)");
1441       emit_int8(0x0F);
1442       emit_int8((unsigned char)(0x80 | cc));
1443       emit_long(offs - long_size);
1444     }
1445   } else {
1446     // Note: could eliminate cond. jumps to this jump if condition
1447     //       is the same however, seems to be rather unlikely case.
1448     // Note: use jccb() if label to be bound is very close to get
1449     //       an 8-bit displacement
1450     L.add_patch_at(code(), locator());
1451     emit_int8(0x0F);
1452     emit_int8((unsigned char)(0x80 | cc));
1453     emit_long(0);
1454   }
1455 }
1456 
1457 void Assembler::jccb(Condition cc, Label& L) {
1458   if (L.is_bound()) {
1459     const int short_size = 2;
1460     address entry = target(L);
1461 #ifdef ASSERT
1462     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1463     intptr_t delta = short_branch_delta();
1464     if (delta != 0) {
1465       dist += (dist < 0 ? (-delta) :delta);
1466     }
1467     assert(is8bit(dist), "Dispacement too large for a short jmp");
1468 #endif
1469     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1470     // 0111 tttn #8-bit disp
1471     emit_int8(0x70 | cc);
1472     emit_int8((offs - short_size) & 0xFF);
1473   } else {
1474     InstructionMark im(this);
1475     L.add_patch_at(code(), locator());
1476     emit_int8(0x70 | cc);
1477     emit_int8(0);
1478   }
1479 }
1480 
1481 void Assembler::jmp(Address adr) {
1482   InstructionMark im(this);
1483   prefix(adr);
1484   emit_int8((unsigned char)0xFF);
1485   emit_operand(rsp, adr);
1486 }
1487 
1488 void Assembler::jmp(Label& L, bool maybe_short) {
1489   if (L.is_bound()) {
1490     address entry = target(L);
1491     assert(entry != NULL, "jmp most probably wrong");
1492     InstructionMark im(this);
1493     const int short_size = 2;
1494     const int long_size = 5;
1495     intptr_t offs = entry - pc();
1496     if (maybe_short && is8bit(offs - short_size)) {
1497       emit_int8((unsigned char)0xEB);
1498       emit_int8((offs - short_size) & 0xFF);
1499     } else {
1500       emit_int8((unsigned char)0xE9);
1501       emit_long(offs - long_size);
1502     }
1503   } else {
1504     // By default, forward jumps are always 32-bit displacements, since
1505     // we can't yet know where the label will be bound.  If you're sure that
1506     // the forward jump will not run beyond 256 bytes, use jmpb to
1507     // force an 8-bit displacement.
1508     InstructionMark im(this);
1509     L.add_patch_at(code(), locator());
1510     emit_int8((unsigned char)0xE9);
1511     emit_long(0);
1512   }
1513 }
1514 
1515 void Assembler::jmp(Register entry) {
1516   int encode = prefix_and_encode(entry->encoding());
1517   emit_int8((unsigned char)0xFF);
1518   emit_int8((unsigned char)(0xE0 | encode));
1519 }
1520 
1521 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1522   InstructionMark im(this);
1523   emit_int8((unsigned char)0xE9);
1524   assert(dest != NULL, "must have a target");
1525   intptr_t disp = dest - (pc() + sizeof(int32_t));
1526   assert(is_simm32(disp), "must be 32bit offset (jmp)");
1527   emit_data(disp, rspec.reloc(), call32_operand);
1528 }
1529 
1530 void Assembler::jmpb(Label& L) {
1531   if (L.is_bound()) {
1532     const int short_size = 2;
1533     address entry = target(L);
1534     assert(entry != NULL, "jmp most probably wrong");
1535 #ifdef ASSERT
1536     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1537     intptr_t delta = short_branch_delta();
1538     if (delta != 0) {
1539       dist += (dist < 0 ? (-delta) :delta);
1540     }
1541     assert(is8bit(dist), "Dispacement too large for a short jmp");
1542 #endif
1543     intptr_t offs = entry - pc();
1544     emit_int8((unsigned char)0xEB);
1545     emit_int8((offs - short_size) & 0xFF);
1546   } else {
1547     InstructionMark im(this);
1548     L.add_patch_at(code(), locator());
1549     emit_int8((unsigned char)0xEB);
1550     emit_int8(0);
1551   }
1552 }
1553 
1554 void Assembler::ldmxcsr( Address src) {
1555   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1556   InstructionMark im(this);
1557   prefix(src);
1558   emit_int8(0x0F);
1559   emit_int8((unsigned char)0xAE);
1560   emit_operand(as_Register(2), src);
1561 }
1562 
1563 void Assembler::leal(Register dst, Address src) {
1564   InstructionMark im(this);
1565 #ifdef _LP64
1566   emit_int8(0x67); // addr32
1567   prefix(src, dst);
1568 #endif // LP64
1569   emit_int8((unsigned char)0x8D);
1570   emit_operand(dst, src);
1571 }
1572 
1573 void Assembler::lfence() {
1574   emit_int8(0x0F);
1575   emit_int8((unsigned char)0xAE);
1576   emit_int8((unsigned char)0xE8);
1577 }
1578 
1579 void Assembler::lock() {
1580   emit_int8((unsigned char)0xF0);
1581 }
1582 
1583 void Assembler::lzcntl(Register dst, Register src) {
1584   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1585   emit_int8((unsigned char)0xF3);
1586   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1587   emit_int8(0x0F);
1588   emit_int8((unsigned char)0xBD);
1589   emit_int8((unsigned char)(0xC0 | encode));
1590 }
1591 
1592 // Emit mfence instruction
1593 void Assembler::mfence() {
1594   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1595   emit_int8(0x0F);
1596   emit_int8((unsigned char)0xAE);
1597   emit_int8((unsigned char)0xF0);
1598 }
1599 
1600 void Assembler::mov(Register dst, Register src) {
1601   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1602 }
1603 
1604 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1605   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1606   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1607 }
1608 
1609 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1610   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1611   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1612 }
1613 
1614 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1615   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1616   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1617   emit_int8(0x16);
1618   emit_int8((unsigned char)(0xC0 | encode));
1619 }
1620 
1621 void Assembler::movb(Register dst, Address src) {
1622   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1623   InstructionMark im(this);
1624   prefix(src, dst, true);
1625   emit_int8((unsigned char)0x8A);
1626   emit_operand(dst, src);
1627 }
1628 
1629 
1630 void Assembler::movb(Address dst, int imm8) {
1631   InstructionMark im(this);
1632    prefix(dst);
1633   emit_int8((unsigned char)0xC6);
1634   emit_operand(rax, dst, 1);
1635   emit_int8(imm8);
1636 }
1637 
1638 
1639 void Assembler::movb(Address dst, Register src) {
1640   assert(src->has_byte_register(), "must have byte register");
1641   InstructionMark im(this);
1642   prefix(dst, src, true);
1643   emit_int8((unsigned char)0x88);
1644   emit_operand(src, dst);
1645 }
1646 
1647 void Assembler::movdl(XMMRegister dst, Register src) {
1648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1649   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1650   emit_int8(0x6E);
1651   emit_int8((unsigned char)(0xC0 | encode));
1652 }
1653 
1654 void Assembler::movdl(Register dst, XMMRegister src) {
1655   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1656   // swap src/dst to get correct prefix
1657   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1658   emit_int8(0x7E);
1659   emit_int8((unsigned char)(0xC0 | encode));
1660 }
1661 
1662 void Assembler::movdl(XMMRegister dst, Address src) {
1663   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1664   InstructionMark im(this);
1665   simd_prefix(dst, src, VEX_SIMD_66);
1666   emit_int8(0x6E);
1667   emit_operand(dst, src);
1668 }
1669 
1670 void Assembler::movdl(Address dst, XMMRegister src) {
1671   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1672   InstructionMark im(this);
1673   simd_prefix(dst, src, VEX_SIMD_66);
1674   emit_int8(0x7E);
1675   emit_operand(src, dst);
1676 }
1677 
1678 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1679   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1680   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1681 }
1682 
1683 void Assembler::movdqu(XMMRegister dst, Address src) {
1684   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1685   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1686 }
1687 
1688 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1689   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1690   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1691 }
1692 
1693 void Assembler::movdqu(Address dst, XMMRegister src) {
1694   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1695   InstructionMark im(this);
1696   simd_prefix(dst, src, VEX_SIMD_F3);
1697   emit_int8(0x7F);
1698   emit_operand(src, dst);
1699 }
1700 
1701 // Move Unaligned 256bit Vector
1702 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1703   assert(UseAVX, "");
1704   bool vector256 = true;
1705   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1706   emit_int8(0x6F);
1707   emit_int8((unsigned char)(0xC0 | encode));
1708 }
1709 
1710 void Assembler::vmovdqu(XMMRegister dst, Address src) {
1711   assert(UseAVX, "");
1712   InstructionMark im(this);
1713   bool vector256 = true;
1714   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1715   emit_int8(0x6F);
1716   emit_operand(dst, src);
1717 }
1718 
1719 void Assembler::vmovdqu(Address dst, XMMRegister src) {
1720   assert(UseAVX, "");
1721   InstructionMark im(this);
1722   bool vector256 = true;
1723   // swap src<->dst for encoding
1724   assert(src != xnoreg, "sanity");
1725   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1726   emit_int8(0x7F);
1727   emit_operand(src, dst);
1728 }
1729 
1730 // Uses zero extension on 64bit
1731 
1732 void Assembler::movl(Register dst, int32_t imm32) {
1733   int encode = prefix_and_encode(dst->encoding());
1734   emit_int8((unsigned char)(0xB8 | encode));
1735   emit_long(imm32);
1736 }
1737 
1738 void Assembler::movl(Register dst, Register src) {
1739   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1740   emit_int8((unsigned char)0x8B);
1741   emit_int8((unsigned char)(0xC0 | encode));
1742 }
1743 
1744 void Assembler::movl(Register dst, Address src) {
1745   InstructionMark im(this);
1746   prefix(src, dst);
1747   emit_int8((unsigned char)0x8B);
1748   emit_operand(dst, src);
1749 }
1750 
1751 void Assembler::movl(Address dst, int32_t imm32) {
1752   InstructionMark im(this);
1753   prefix(dst);
1754   emit_int8((unsigned char)0xC7);
1755   emit_operand(rax, dst, 4);
1756   emit_long(imm32);
1757 }
1758 
1759 void Assembler::movl(Address dst, Register src) {
1760   InstructionMark im(this);
1761   prefix(dst, src);
1762   emit_int8((unsigned char)0x89);
1763   emit_operand(src, dst);
1764 }
1765 
1766 // New cpus require to use movsd and movss to avoid partial register stall
1767 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1768 // The selection is done in MacroAssembler::movdbl() and movflt().
1769 void Assembler::movlpd(XMMRegister dst, Address src) {
1770   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1771   emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1772 }
1773 
1774 void Assembler::movq( MMXRegister dst, Address src ) {
1775   assert( VM_Version::supports_mmx(), "" );
1776   emit_int8(0x0F);
1777   emit_int8(0x6F);
1778   emit_operand(dst, src);
1779 }
1780 
1781 void Assembler::movq( Address dst, MMXRegister src ) {
1782   assert( VM_Version::supports_mmx(), "" );
1783   emit_int8(0x0F);
1784   emit_int8(0x7F);
1785   // workaround gcc (3.2.1-7a) bug
1786   // In that version of gcc with only an emit_operand(MMX, Address)
1787   // gcc will tail jump and try and reverse the parameters completely
1788   // obliterating dst in the process. By having a version available
1789   // that doesn't need to swap the args at the tail jump the bug is
1790   // avoided.
1791   emit_operand(dst, src);
1792 }
1793 
1794 void Assembler::movq(XMMRegister dst, Address src) {
1795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1796   InstructionMark im(this);
1797   simd_prefix(dst, src, VEX_SIMD_F3);
1798   emit_int8(0x7E);
1799   emit_operand(dst, src);
1800 }
1801 
1802 void Assembler::movq(Address dst, XMMRegister src) {
1803   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1804   InstructionMark im(this);
1805   simd_prefix(dst, src, VEX_SIMD_66);
1806   emit_int8((unsigned char)0xD6);
1807   emit_operand(src, dst);
1808 }
1809 
1810 void Assembler::movsbl(Register dst, Address src) { // movsxb
1811   InstructionMark im(this);
1812   prefix(src, dst);
1813   emit_int8(0x0F);
1814   emit_int8((unsigned char)0xBE);
1815   emit_operand(dst, src);
1816 }
1817 
1818 void Assembler::movsbl(Register dst, Register src) { // movsxb
1819   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1820   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1821   emit_int8(0x0F);
1822   emit_int8((unsigned char)0xBE);
1823   emit_int8((unsigned char)(0xC0 | encode));
1824 }
1825 
1826 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1828   emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1829 }
1830 
1831 void Assembler::movsd(XMMRegister dst, Address src) {
1832   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1833   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1834 }
1835 
1836 void Assembler::movsd(Address dst, XMMRegister src) {
1837   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1838   InstructionMark im(this);
1839   simd_prefix(dst, src, VEX_SIMD_F2);
1840   emit_int8(0x11);
1841   emit_operand(src, dst);
1842 }
1843 
1844 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1845   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1846   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1847 }
1848 
1849 void Assembler::movss(XMMRegister dst, Address src) {
1850   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1851   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1852 }
1853 
1854 void Assembler::movss(Address dst, XMMRegister src) {
1855   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1856   InstructionMark im(this);
1857   simd_prefix(dst, src, VEX_SIMD_F3);
1858   emit_int8(0x11);
1859   emit_operand(src, dst);
1860 }
1861 
1862 void Assembler::movswl(Register dst, Address src) { // movsxw
1863   InstructionMark im(this);
1864   prefix(src, dst);
1865   emit_int8(0x0F);
1866   emit_int8((unsigned char)0xBF);
1867   emit_operand(dst, src);
1868 }
1869 
1870 void Assembler::movswl(Register dst, Register src) { // movsxw
1871   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1872   emit_int8(0x0F);
1873   emit_int8((unsigned char)0xBF);
1874   emit_int8((unsigned char)(0xC0 | encode));
1875 }
1876 
1877 void Assembler::movw(Address dst, int imm16) {
1878   InstructionMark im(this);
1879 
1880   emit_int8(0x66); // switch to 16-bit mode
1881   prefix(dst);
1882   emit_int8((unsigned char)0xC7);
1883   emit_operand(rax, dst, 2);
1884   emit_int16(imm16);
1885 }
1886 
1887 void Assembler::movw(Register dst, Address src) {
1888   InstructionMark im(this);
1889   emit_int8(0x66);
1890   prefix(src, dst);
1891   emit_int8((unsigned char)0x8B);
1892   emit_operand(dst, src);
1893 }
1894 
1895 void Assembler::movw(Address dst, Register src) {
1896   InstructionMark im(this);
1897   emit_int8(0x66);
1898   prefix(dst, src);
1899   emit_int8((unsigned char)0x89);
1900   emit_operand(src, dst);
1901 }
1902 
1903 void Assembler::movzbl(Register dst, Address src) { // movzxb
1904   InstructionMark im(this);
1905   prefix(src, dst);
1906   emit_int8(0x0F);
1907   emit_int8((unsigned char)0xB6);
1908   emit_operand(dst, src);
1909 }
1910 
1911 void Assembler::movzbl(Register dst, Register src) { // movzxb
1912   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1913   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1914   emit_int8(0x0F);
1915   emit_int8((unsigned char)0xB6);
1916   emit_int8(0xC0 | encode);
1917 }
1918 
1919 void Assembler::movzwl(Register dst, Address src) { // movzxw
1920   InstructionMark im(this);
1921   prefix(src, dst);
1922   emit_int8(0x0F);
1923   emit_int8((unsigned char)0xB7);
1924   emit_operand(dst, src);
1925 }
1926 
1927 void Assembler::movzwl(Register dst, Register src) { // movzxw
1928   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1929   emit_int8(0x0F);
1930   emit_int8((unsigned char)0xB7);
1931   emit_int8(0xC0 | encode);
1932 }
1933 
1934 void Assembler::mull(Address src) {
1935   InstructionMark im(this);
1936   prefix(src);
1937   emit_int8((unsigned char)0xF7);
1938   emit_operand(rsp, src);
1939 }
1940 
1941 void Assembler::mull(Register src) {
1942   int encode = prefix_and_encode(src->encoding());
1943   emit_int8((unsigned char)0xF7);
1944   emit_int8((unsigned char)(0xE0 | encode));
1945 }
1946 
1947 void Assembler::mulsd(XMMRegister dst, Address src) {
1948   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1949   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1950 }
1951 
1952 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1953   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1954   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1955 }
1956 
1957 void Assembler::mulss(XMMRegister dst, Address src) {
1958   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1959   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1960 }
1961 
1962 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1963   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1964   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1965 }
1966 
1967 void Assembler::negl(Register dst) {
1968   int encode = prefix_and_encode(dst->encoding());
1969   emit_int8((unsigned char)0xF7);
1970   emit_int8((unsigned char)(0xD8 | encode));
1971 }
1972 
1973 void Assembler::nop(int i) {
1974 #ifdef ASSERT
1975   assert(i > 0, " ");
1976   // The fancy nops aren't currently recognized by debuggers making it a
1977   // pain to disassemble code while debugging. If asserts are on clearly
1978   // speed is not an issue so simply use the single byte traditional nop
1979   // to do alignment.
1980 
1981   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
1982   return;
1983 
1984 #endif // ASSERT
1985 
1986   if (UseAddressNop && VM_Version::is_intel()) {
1987     //
1988     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1989     //  1: 0x90
1990     //  2: 0x66 0x90
1991     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1992     //  4: 0x0F 0x1F 0x40 0x00
1993     //  5: 0x0F 0x1F 0x44 0x00 0x00
1994     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1995     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1996     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1997     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1998     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1999     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2000 
2001     // The rest coding is Intel specific - don't use consecutive address nops
2002 
2003     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2004     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2005     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2006     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2007 
2008     while(i >= 15) {
2009       // For Intel don't generate consecutive addess nops (mix with regular nops)
2010       i -= 15;
2011       emit_int8(0x66);   // size prefix
2012       emit_int8(0x66);   // size prefix
2013       emit_int8(0x66);   // size prefix
2014       addr_nop_8();
2015       emit_int8(0x66);   // size prefix
2016       emit_int8(0x66);   // size prefix
2017       emit_int8(0x66);   // size prefix
2018       emit_int8((unsigned char)0x90);
2019                          // nop
2020     }
2021     switch (i) {
2022       case 14:
2023         emit_int8(0x66); // size prefix
2024       case 13:
2025         emit_int8(0x66); // size prefix
2026       case 12:
2027         addr_nop_8();
2028         emit_int8(0x66); // size prefix
2029         emit_int8(0x66); // size prefix
2030         emit_int8(0x66); // size prefix
2031         emit_int8((unsigned char)0x90);
2032                          // nop
2033         break;
2034       case 11:
2035         emit_int8(0x66); // size prefix
2036       case 10:
2037         emit_int8(0x66); // size prefix
2038       case 9:
2039         emit_int8(0x66); // size prefix
2040       case 8:
2041         addr_nop_8();
2042         break;
2043       case 7:
2044         addr_nop_7();
2045         break;
2046       case 6:
2047         emit_int8(0x66); // size prefix
2048       case 5:
2049         addr_nop_5();
2050         break;
2051       case 4:
2052         addr_nop_4();
2053         break;
2054       case 3:
2055         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2056         emit_int8(0x66); // size prefix
2057       case 2:
2058         emit_int8(0x66); // size prefix
2059       case 1:
2060         emit_int8((unsigned char)0x90);
2061                          // nop
2062         break;
2063       default:
2064         assert(i == 0, " ");
2065     }
2066     return;
2067   }
2068   if (UseAddressNop && VM_Version::is_amd()) {
2069     //
2070     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2071     //  1: 0x90
2072     //  2: 0x66 0x90
2073     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2074     //  4: 0x0F 0x1F 0x40 0x00
2075     //  5: 0x0F 0x1F 0x44 0x00 0x00
2076     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2077     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2078     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2079     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2080     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2081     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2082 
2083     // The rest coding is AMD specific - use consecutive address nops
2084 
2085     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2086     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2087     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2088     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2089     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2090     //     Size prefixes (0x66) are added for larger sizes
2091 
2092     while(i >= 22) {
2093       i -= 11;
2094       emit_int8(0x66); // size prefix
2095       emit_int8(0x66); // size prefix
2096       emit_int8(0x66); // size prefix
2097       addr_nop_8();
2098     }
2099     // Generate first nop for size between 21-12
2100     switch (i) {
2101       case 21:
2102         i -= 1;
2103         emit_int8(0x66); // size prefix
2104       case 20:
2105       case 19:
2106         i -= 1;
2107         emit_int8(0x66); // size prefix
2108       case 18:
2109       case 17:
2110         i -= 1;
2111         emit_int8(0x66); // size prefix
2112       case 16:
2113       case 15:
2114         i -= 8;
2115         addr_nop_8();
2116         break;
2117       case 14:
2118       case 13:
2119         i -= 7;
2120         addr_nop_7();
2121         break;
2122       case 12:
2123         i -= 6;
2124         emit_int8(0x66); // size prefix
2125         addr_nop_5();
2126         break;
2127       default:
2128         assert(i < 12, " ");
2129     }
2130 
2131     // Generate second nop for size between 11-1
2132     switch (i) {
2133       case 11:
2134         emit_int8(0x66); // size prefix
2135       case 10:
2136         emit_int8(0x66); // size prefix
2137       case 9:
2138         emit_int8(0x66); // size prefix
2139       case 8:
2140         addr_nop_8();
2141         break;
2142       case 7:
2143         addr_nop_7();
2144         break;
2145       case 6:
2146         emit_int8(0x66); // size prefix
2147       case 5:
2148         addr_nop_5();
2149         break;
2150       case 4:
2151         addr_nop_4();
2152         break;
2153       case 3:
2154         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2155         emit_int8(0x66); // size prefix
2156       case 2:
2157         emit_int8(0x66); // size prefix
2158       case 1:
2159         emit_int8((unsigned char)0x90);
2160                          // nop
2161         break;
2162       default:
2163         assert(i == 0, " ");
2164     }
2165     return;
2166   }
2167 
2168   // Using nops with size prefixes "0x66 0x90".
2169   // From AMD Optimization Guide:
2170   //  1: 0x90
2171   //  2: 0x66 0x90
2172   //  3: 0x66 0x66 0x90
2173   //  4: 0x66 0x66 0x66 0x90
2174   //  5: 0x66 0x66 0x90 0x66 0x90
2175   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2176   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2177   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2178   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2179   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2180   //
2181   while(i > 12) {
2182     i -= 4;
2183     emit_int8(0x66); // size prefix
2184     emit_int8(0x66);
2185     emit_int8(0x66);
2186     emit_int8((unsigned char)0x90);
2187                      // nop
2188   }
2189   // 1 - 12 nops
2190   if(i > 8) {
2191     if(i > 9) {
2192       i -= 1;
2193       emit_int8(0x66);
2194     }
2195     i -= 3;
2196     emit_int8(0x66);
2197     emit_int8(0x66);
2198     emit_int8((unsigned char)0x90);
2199   }
2200   // 1 - 8 nops
2201   if(i > 4) {
2202     if(i > 6) {
2203       i -= 1;
2204       emit_int8(0x66);
2205     }
2206     i -= 3;
2207     emit_int8(0x66);
2208     emit_int8(0x66);
2209     emit_int8((unsigned char)0x90);
2210   }
2211   switch (i) {
2212     case 4:
2213       emit_int8(0x66);
2214     case 3:
2215       emit_int8(0x66);
2216     case 2:
2217       emit_int8(0x66);
2218     case 1:
2219       emit_int8((unsigned char)0x90);
2220       break;
2221     default:
2222       assert(i == 0, " ");
2223   }
2224 }
2225 
2226 void Assembler::notl(Register dst) {
2227   int encode = prefix_and_encode(dst->encoding());
2228   emit_int8((unsigned char)0xF7);
2229   emit_int8((unsigned char)(0xD0 | encode));
2230 }
2231 
2232 void Assembler::orl(Address dst, int32_t imm32) {
2233   InstructionMark im(this);
2234   prefix(dst);
2235   emit_arith_operand(0x81, rcx, dst, imm32);
2236 }
2237 
2238 void Assembler::orl(Register dst, int32_t imm32) {
2239   prefix(dst);
2240   emit_arith(0x81, 0xC8, dst, imm32);
2241 }
2242 
2243 void Assembler::orl(Register dst, Address src) {
2244   InstructionMark im(this);
2245   prefix(src, dst);
2246   emit_int8(0x0B);
2247   emit_operand(dst, src);
2248 }
2249 
2250 void Assembler::orl(Register dst, Register src) {
2251   (void) prefix_and_encode(dst->encoding(), src->encoding());
2252   emit_arith(0x0B, 0xC0, dst, src);
2253 }
2254 
2255 void Assembler::packuswb(XMMRegister dst, Address src) {
2256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2257   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2258   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2259 }
2260 
2261 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2263   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2264 }
2265 
2266 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2267   assert(VM_Version::supports_sse4_2(), "");
2268   InstructionMark im(this);
2269   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2270   emit_int8(0x61);
2271   emit_operand(dst, src);
2272   emit_int8(imm8);
2273 }
2274 
2275 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2276   assert(VM_Version::supports_sse4_2(), "");
2277   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2278   emit_int8(0x61);
2279   emit_int8((unsigned char)(0xC0 | encode));
2280   emit_int8(imm8);
2281 }
2282 
2283 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2284   assert(VM_Version::supports_sse4_1(), "");
2285   InstructionMark im(this);
2286   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2287   emit_int8(0x30);
2288   emit_operand(dst, src);
2289 }
2290 
2291 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2292   assert(VM_Version::supports_sse4_1(), "");
2293   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2294   emit_int8(0x30);
2295   emit_int8((unsigned char)(0xC0 | encode));
2296 }
2297 
2298 // generic
2299 void Assembler::pop(Register dst) {
2300   int encode = prefix_and_encode(dst->encoding());
2301   emit_int8(0x58 | encode);
2302 }
2303 
2304 void Assembler::popcntl(Register dst, Address src) {
2305   assert(VM_Version::supports_popcnt(), "must support");
2306   InstructionMark im(this);
2307   emit_int8((unsigned char)0xF3);
2308   prefix(src, dst);
2309   emit_int8(0x0F);
2310   emit_int8((unsigned char)0xB8);
2311   emit_operand(dst, src);
2312 }
2313 
2314 void Assembler::popcntl(Register dst, Register src) {
2315   assert(VM_Version::supports_popcnt(), "must support");
2316   emit_int8((unsigned char)0xF3);
2317   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2318   emit_int8(0x0F);
2319   emit_int8((unsigned char)0xB8);
2320   emit_int8((unsigned char)(0xC0 | encode));
2321 }
2322 
2323 void Assembler::popf() {
2324   emit_int8((unsigned char)0x9D);
2325 }
2326 
2327 #ifndef _LP64 // no 32bit push/pop on amd64
2328 void Assembler::popl(Address dst) {
2329   // NOTE: this will adjust stack by 8byte on 64bits
2330   InstructionMark im(this);
2331   prefix(dst);
2332   emit_int8((unsigned char)0x8F);
2333   emit_operand(rax, dst);
2334 }
2335 #endif
2336 
2337 void Assembler::prefetch_prefix(Address src) {
2338   prefix(src);
2339   emit_int8(0x0F);
2340 }
2341 
2342 void Assembler::prefetchnta(Address src) {
2343   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2344   InstructionMark im(this);
2345   prefetch_prefix(src);
2346   emit_int8(0x18);
2347   emit_operand(rax, src); // 0, src
2348 }
2349 
2350 void Assembler::prefetchr(Address src) {
2351   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2352   InstructionMark im(this);
2353   prefetch_prefix(src);
2354   emit_int8(0x0D);
2355   emit_operand(rax, src); // 0, src
2356 }
2357 
2358 void Assembler::prefetcht0(Address src) {
2359   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2360   InstructionMark im(this);
2361   prefetch_prefix(src);
2362   emit_int8(0x18);
2363   emit_operand(rcx, src); // 1, src
2364 }
2365 
2366 void Assembler::prefetcht1(Address src) {
2367   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2368   InstructionMark im(this);
2369   prefetch_prefix(src);
2370   emit_int8(0x18);
2371   emit_operand(rdx, src); // 2, src
2372 }
2373 
2374 void Assembler::prefetcht2(Address src) {
2375   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2376   InstructionMark im(this);
2377   prefetch_prefix(src);
2378   emit_int8(0x18);
2379   emit_operand(rbx, src); // 3, src
2380 }
2381 
2382 void Assembler::prefetchw(Address src) {
2383   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2384   InstructionMark im(this);
2385   prefetch_prefix(src);
2386   emit_int8(0x0D);
2387   emit_operand(rcx, src); // 1, src
2388 }
2389 
2390 void Assembler::prefix(Prefix p) {
2391   emit_int8(p);
2392 }
2393 
2394 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2395   assert(VM_Version::supports_ssse3(), "");
2396   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2397   emit_int8(0x00);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399 }
2400 
2401 void Assembler::pshufb(XMMRegister dst, Address src) {
2402   assert(VM_Version::supports_ssse3(), "");
2403   InstructionMark im(this);
2404   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2405   emit_int8(0x00);
2406   emit_operand(dst, src);
2407 }
2408 
2409 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2410   assert(isByte(mode), "invalid value");
2411   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2412   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2413   emit_int8(mode & 0xFF);
2414 
2415 }
2416 
2417 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2418   assert(isByte(mode), "invalid value");
2419   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2420   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2421   InstructionMark im(this);
2422   simd_prefix(dst, src, VEX_SIMD_66);
2423   emit_int8(0x70);
2424   emit_operand(dst, src);
2425   emit_int8(mode & 0xFF);
2426 }
2427 
2428 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2429   assert(isByte(mode), "invalid value");
2430   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2431   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2432   emit_int8(mode & 0xFF);
2433 }
2434 
2435 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2436   assert(isByte(mode), "invalid value");
2437   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2438   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2439   InstructionMark im(this);
2440   simd_prefix(dst, src, VEX_SIMD_F2);
2441   emit_int8(0x70);
2442   emit_operand(dst, src);
2443   emit_int8(mode & 0xFF);
2444 }
2445 
2446 void Assembler::psrldq(XMMRegister dst, int shift) {
2447   // Shift 128 bit value in xmm register by number of bytes.
2448   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2449   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2450   emit_int8(0x73);
2451   emit_int8((unsigned char)(0xC0 | encode));
2452   emit_int8(shift);
2453 }
2454 
2455 void Assembler::ptest(XMMRegister dst, Address src) {
2456   assert(VM_Version::supports_sse4_1(), "");
2457   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2458   InstructionMark im(this);
2459   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2460   emit_int8(0x17);
2461   emit_operand(dst, src);
2462 }
2463 
2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2465   assert(VM_Version::supports_sse4_1(), "");
2466   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2467   emit_int8(0x17);
2468   emit_int8((unsigned char)(0xC0 | encode));
2469 }
2470 
2471 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2474   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2475 }
2476 
2477 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2478   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2479   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2480 }
2481 
2482 void Assembler::punpckldq(XMMRegister dst, Address src) {
2483   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2484   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2485   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2486 }
2487 
2488 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2489   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2490   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2491 }
2492 
2493 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2494   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2495   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2496 }
2497 
2498 void Assembler::push(int32_t imm32) {
2499   // in 64bits we push 64bits onto the stack but only
2500   // take a 32bit immediate
2501   emit_int8(0x68);
2502   emit_long(imm32);
2503 }
2504 
2505 void Assembler::push(Register src) {
2506   int encode = prefix_and_encode(src->encoding());
2507 
2508   emit_int8(0x50 | encode);
2509 }
2510 
2511 void Assembler::pushf() {
2512   emit_int8((unsigned char)0x9C);
2513 }
2514 
2515 #ifndef _LP64 // no 32bit push/pop on amd64
2516 void Assembler::pushl(Address src) {
2517   // Note this will push 64bit on 64bit
2518   InstructionMark im(this);
2519   prefix(src);
2520   emit_int8((unsigned char)0xFF);
2521   emit_operand(rsi, src);
2522 }
2523 #endif
2524 
2525 void Assembler::rcll(Register dst, int imm8) {
2526   assert(isShiftCount(imm8), "illegal shift count");
2527   int encode = prefix_and_encode(dst->encoding());
2528   if (imm8 == 1) {
2529     emit_int8((unsigned char)0xD1);
2530     emit_int8((unsigned char)(0xD0 | encode));
2531   } else {
2532     emit_int8((unsigned char)0xC1);
2533     emit_int8((unsigned char)0xD0 | encode);
2534     emit_int8(imm8);
2535   }
2536 }
2537 
2538 // copies data from [esi] to [edi] using rcx pointer sized words
2539 // generic
2540 void Assembler::rep_mov() {
2541   emit_int8((unsigned char)0xF3);
2542   // MOVSQ
2543   LP64_ONLY(prefix(REX_W));
2544   emit_int8((unsigned char)0xA5);
2545 }
2546 
2547 // sets rcx pointer sized words with rax, value at [edi]
2548 // generic
2549 void Assembler::rep_set() { // rep_set
2550   emit_int8((unsigned char)0xF3);
2551   // STOSQ
2552   LP64_ONLY(prefix(REX_W));
2553   emit_int8((unsigned char)0xAB);
2554 }
2555 
2556 // scans rcx pointer sized words at [edi] for occurance of rax,
2557 // generic
2558 void Assembler::repne_scan() { // repne_scan
2559   emit_int8((unsigned char)0xF2);
2560   // SCASQ
2561   LP64_ONLY(prefix(REX_W));
2562   emit_int8((unsigned char)0xAF);
2563 }
2564 
2565 #ifdef _LP64
2566 // scans rcx 4 byte words at [edi] for occurance of rax,
2567 // generic
2568 void Assembler::repne_scanl() { // repne_scan
2569   emit_int8((unsigned char)0xF2);
2570   // SCASL
2571   emit_int8((unsigned char)0xAF);
2572 }
2573 #endif
2574 
2575 void Assembler::ret(int imm16) {
2576   if (imm16 == 0) {
2577     emit_int8((unsigned char)0xC3);
2578   } else {
2579     emit_int8((unsigned char)0xC2);
2580     emit_int16(imm16);
2581   }
2582 }
2583 
2584 void Assembler::sahf() {
2585 #ifdef _LP64
2586   // Not supported in 64bit mode
2587   ShouldNotReachHere();
2588 #endif
2589   emit_int8((unsigned char)0x9E);
2590 }
2591 
2592 void Assembler::sarl(Register dst, int imm8) {
2593   int encode = prefix_and_encode(dst->encoding());
2594   assert(isShiftCount(imm8), "illegal shift count");
2595   if (imm8 == 1) {
2596     emit_int8((unsigned char)0xD1);
2597     emit_int8((unsigned char)(0xF8 | encode));
2598   } else {
2599     emit_int8((unsigned char)0xC1);
2600     emit_int8((unsigned char)(0xF8 | encode));
2601     emit_int8(imm8);
2602   }
2603 }
2604 
2605 void Assembler::sarl(Register dst) {
2606   int encode = prefix_and_encode(dst->encoding());
2607   emit_int8((unsigned char)0xD3);
2608   emit_int8((unsigned char)(0xF8 | encode));
2609 }
2610 
2611 void Assembler::sbbl(Address dst, int32_t imm32) {
2612   InstructionMark im(this);
2613   prefix(dst);
2614   emit_arith_operand(0x81, rbx, dst, imm32);
2615 }
2616 
2617 void Assembler::sbbl(Register dst, int32_t imm32) {
2618   prefix(dst);
2619   emit_arith(0x81, 0xD8, dst, imm32);
2620 }
2621 
2622 
2623 void Assembler::sbbl(Register dst, Address src) {
2624   InstructionMark im(this);
2625   prefix(src, dst);
2626   emit_int8(0x1B);
2627   emit_operand(dst, src);
2628 }
2629 
2630 void Assembler::sbbl(Register dst, Register src) {
2631   (void) prefix_and_encode(dst->encoding(), src->encoding());
2632   emit_arith(0x1B, 0xC0, dst, src);
2633 }
2634 
2635 void Assembler::setb(Condition cc, Register dst) {
2636   assert(0 <= cc && cc < 16, "illegal cc");
2637   int encode = prefix_and_encode(dst->encoding(), true);
2638   emit_int8(0x0F);
2639   emit_int8((unsigned char)0x90 | cc);
2640   emit_int8((unsigned char)(0xC0 | encode));
2641 }
2642 
2643 void Assembler::shll(Register dst, int imm8) {
2644   assert(isShiftCount(imm8), "illegal shift count");
2645   int encode = prefix_and_encode(dst->encoding());
2646   if (imm8 == 1 ) {
2647     emit_int8((unsigned char)0xD1);
2648     emit_int8((unsigned char)(0xE0 | encode));
2649   } else {
2650     emit_int8((unsigned char)0xC1);
2651     emit_int8((unsigned char)(0xE0 | encode));
2652     emit_int8(imm8);
2653   }
2654 }
2655 
2656 void Assembler::shll(Register dst) {
2657   int encode = prefix_and_encode(dst->encoding());
2658   emit_int8((unsigned char)0xD3);
2659   emit_int8((unsigned char)(0xE0 | encode));
2660 }
2661 
2662 void Assembler::shrl(Register dst, int imm8) {
2663   assert(isShiftCount(imm8), "illegal shift count");
2664   int encode = prefix_and_encode(dst->encoding());
2665   emit_int8((unsigned char)0xC1);
2666   emit_int8((unsigned char)(0xE8 | encode));
2667   emit_int8(imm8);
2668 }
2669 
2670 void Assembler::shrl(Register dst) {
2671   int encode = prefix_and_encode(dst->encoding());
2672   emit_int8((unsigned char)0xD3);
2673   emit_int8((unsigned char)(0xE8 | encode));
2674 }
2675 
2676 // copies a single word from [esi] to [edi]
2677 void Assembler::smovl() {
2678   emit_int8((unsigned char)0xA5);
2679 }
2680 
2681 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2682   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2683   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2684 }
2685 
2686 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2687   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2688   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2689 }
2690 
2691 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2692   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2693   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2694 }
2695 
2696 void Assembler::std() {
2697   emit_int8((unsigned char)0xFD);
2698 }
2699 
2700 void Assembler::sqrtss(XMMRegister dst, Address src) {
2701   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2702   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2703 }
2704 
2705 void Assembler::stmxcsr( Address dst) {
2706   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2707   InstructionMark im(this);
2708   prefix(dst);
2709   emit_int8(0x0F);
2710   emit_int8((unsigned char)0xAE);
2711   emit_operand(as_Register(3), dst);
2712 }
2713 
2714 void Assembler::subl(Address dst, int32_t imm32) {
2715   InstructionMark im(this);
2716   prefix(dst);
2717   emit_arith_operand(0x81, rbp, dst, imm32);
2718 }
2719 
2720 void Assembler::subl(Address dst, Register src) {
2721   InstructionMark im(this);
2722   prefix(dst, src);
2723   emit_int8(0x29);
2724   emit_operand(src, dst);
2725 }
2726 
2727 void Assembler::subl(Register dst, int32_t imm32) {
2728   prefix(dst);
2729   emit_arith(0x81, 0xE8, dst, imm32);
2730 }
2731 
2732 // Force generation of a 4 byte immediate value even if it fits into 8bit
2733 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2734   prefix(dst);
2735   emit_arith_imm32(0x81, 0xE8, dst, imm32);
2736 }
2737 
2738 void Assembler::subl(Register dst, Address src) {
2739   InstructionMark im(this);
2740   prefix(src, dst);
2741   emit_int8(0x2B);
2742   emit_operand(dst, src);
2743 }
2744 
2745 void Assembler::subl(Register dst, Register src) {
2746   (void) prefix_and_encode(dst->encoding(), src->encoding());
2747   emit_arith(0x2B, 0xC0, dst, src);
2748 }
2749 
2750 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2752   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2753 }
2754 
2755 void Assembler::subsd(XMMRegister dst, Address src) {
2756   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2757   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2758 }
2759 
2760 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2761   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2762   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2763 }
2764 
2765 void Assembler::subss(XMMRegister dst, Address src) {
2766   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2767   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2768 }
2769 
2770 void Assembler::testb(Register dst, int imm8) {
2771   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2772   (void) prefix_and_encode(dst->encoding(), true);
2773   emit_arith_b(0xF6, 0xC0, dst, imm8);
2774 }
2775 
2776 void Assembler::testl(Register dst, int32_t imm32) {
2777   // not using emit_arith because test
2778   // doesn't support sign-extension of
2779   // 8bit operands
2780   int encode = dst->encoding();
2781   if (encode == 0) {
2782     emit_int8((unsigned char)0xA9);
2783   } else {
2784     encode = prefix_and_encode(encode);
2785     emit_int8((unsigned char)0xF7);
2786     emit_int8((unsigned char)(0xC0 | encode));
2787   }
2788   emit_long(imm32);
2789 }
2790 
2791 void Assembler::testl(Register dst, Register src) {
2792   (void) prefix_and_encode(dst->encoding(), src->encoding());
2793   emit_arith(0x85, 0xC0, dst, src);
2794 }
2795 
2796 void Assembler::testl(Register dst, Address  src) {
2797   InstructionMark im(this);
2798   prefix(src, dst);
2799   emit_int8((unsigned char)0x85);
2800   emit_operand(dst, src);
2801 }
2802 
2803 void Assembler::ucomisd(XMMRegister dst, Address src) {
2804   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2805   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2806 }
2807 
2808 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2809   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2810   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2811 }
2812 
2813 void Assembler::ucomiss(XMMRegister dst, Address src) {
2814   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2815   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2816 }
2817 
2818 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2819   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2820   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2821 }
2822 
2823 
2824 void Assembler::xaddl(Address dst, Register src) {
2825   InstructionMark im(this);
2826   prefix(dst, src);
2827   emit_int8(0x0F);
2828   emit_int8((unsigned char)0xC1);
2829   emit_operand(src, dst);
2830 }
2831 
2832 void Assembler::xchgl(Register dst, Address src) { // xchg
2833   InstructionMark im(this);
2834   prefix(src, dst);
2835   emit_int8((unsigned char)0x87);
2836   emit_operand(dst, src);
2837 }
2838 
2839 void Assembler::xchgl(Register dst, Register src) {
2840   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2841   emit_int8((unsigned char)0x87);
2842   emit_int8((unsigned char)(0xC0 | encode));
2843 }
2844 
2845 void Assembler::xgetbv() {
2846   emit_int8(0x0F);
2847   emit_int8(0x01);
2848   emit_int8((unsigned char)0xD0);
2849 }
2850 
2851 void Assembler::xorl(Register dst, int32_t imm32) {
2852   prefix(dst);
2853   emit_arith(0x81, 0xF0, dst, imm32);
2854 }
2855 
2856 void Assembler::xorl(Register dst, Address src) {
2857   InstructionMark im(this);
2858   prefix(src, dst);
2859   emit_int8(0x33);
2860   emit_operand(dst, src);
2861 }
2862 
2863 void Assembler::xorl(Register dst, Register src) {
2864   (void) prefix_and_encode(dst->encoding(), src->encoding());
2865   emit_arith(0x33, 0xC0, dst, src);
2866 }
2867 
2868 
2869 // AVX 3-operands scalar float-point arithmetic instructions
2870 
2871 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
2872   assert(VM_Version::supports_avx(), "");
2873   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2874 }
2875 
2876 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2877   assert(VM_Version::supports_avx(), "");
2878   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2879 }
2880 
2881 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
2882   assert(VM_Version::supports_avx(), "");
2883   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2884 }
2885 
2886 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2887   assert(VM_Version::supports_avx(), "");
2888   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2889 }
2890 
2891 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
2892   assert(VM_Version::supports_avx(), "");
2893   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2894 }
2895 
2896 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2897   assert(VM_Version::supports_avx(), "");
2898   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2899 }
2900 
2901 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
2902   assert(VM_Version::supports_avx(), "");
2903   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2904 }
2905 
2906 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2907   assert(VM_Version::supports_avx(), "");
2908   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2909 }
2910 
2911 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
2912   assert(VM_Version::supports_avx(), "");
2913   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2914 }
2915 
2916 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2917   assert(VM_Version::supports_avx(), "");
2918   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2919 }
2920 
2921 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
2922   assert(VM_Version::supports_avx(), "");
2923   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2924 }
2925 
2926 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2927   assert(VM_Version::supports_avx(), "");
2928   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2929 }
2930 
2931 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
2932   assert(VM_Version::supports_avx(), "");
2933   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2934 }
2935 
2936 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2937   assert(VM_Version::supports_avx(), "");
2938   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2939 }
2940 
2941 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
2942   assert(VM_Version::supports_avx(), "");
2943   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2944 }
2945 
2946 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2947   assert(VM_Version::supports_avx(), "");
2948   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2949 }
2950 
2951 //====================VECTOR ARITHMETIC=====================================
2952 
2953 // Float-point vector arithmetic
2954 
2955 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
2956   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2957   emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
2958 }
2959 
2960 void Assembler::addps(XMMRegister dst, XMMRegister src) {
2961   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2962   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
2963 }
2964 
2965 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2966   assert(VM_Version::supports_avx(), "");
2967   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2968 }
2969 
2970 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2971   assert(VM_Version::supports_avx(), "");
2972   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2973 }
2974 
2975 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2976   assert(VM_Version::supports_avx(), "");
2977   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2978 }
2979 
2980 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2981   assert(VM_Version::supports_avx(), "");
2982   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2983 }
2984 
2985 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
2986   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2987   emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
2988 }
2989 
2990 void Assembler::subps(XMMRegister dst, XMMRegister src) {
2991   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2992   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
2993 }
2994 
2995 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2996   assert(VM_Version::supports_avx(), "");
2997   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
2998 }
2999 
3000 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3001   assert(VM_Version::supports_avx(), "");
3002   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3003 }
3004 
3005 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3006   assert(VM_Version::supports_avx(), "");
3007   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3008 }
3009 
3010 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3011   assert(VM_Version::supports_avx(), "");
3012   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3013 }
3014 
3015 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3016   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3017   emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3018 }
3019 
3020 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3021   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3022   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3023 }
3024 
3025 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3026   assert(VM_Version::supports_avx(), "");
3027   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3028 }
3029 
3030 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3031   assert(VM_Version::supports_avx(), "");
3032   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3033 }
3034 
3035 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3036   assert(VM_Version::supports_avx(), "");
3037   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3038 }
3039 
3040 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3041   assert(VM_Version::supports_avx(), "");
3042   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3043 }
3044 
3045 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3047   emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3048 }
3049 
3050 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3051   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3052   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3053 }
3054 
3055 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3056   assert(VM_Version::supports_avx(), "");
3057   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3058 }
3059 
3060 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3061   assert(VM_Version::supports_avx(), "");
3062   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3063 }
3064 
3065 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3066   assert(VM_Version::supports_avx(), "");
3067   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3068 }
3069 
3070 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3071   assert(VM_Version::supports_avx(), "");
3072   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3073 }
3074 
3075 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3076   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3077   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3078 }
3079 
3080 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3081   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3082   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3083 }
3084 
3085 void Assembler::andps(XMMRegister dst, Address src) {
3086   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3087   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3088 }
3089 
3090 void Assembler::andpd(XMMRegister dst, Address src) {
3091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3092   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3093 }
3094 
3095 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3096   assert(VM_Version::supports_avx(), "");
3097   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3098 }
3099 
3100 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3101   assert(VM_Version::supports_avx(), "");
3102   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3103 }
3104 
3105 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3106   assert(VM_Version::supports_avx(), "");
3107   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3108 }
3109 
3110 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3111   assert(VM_Version::supports_avx(), "");
3112   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3113 }
3114 
3115 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3116   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3117   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3118 }
3119 
3120 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3121   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3122   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3123 }
3124 
3125 void Assembler::xorpd(XMMRegister dst, Address src) {
3126   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3127   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3128 }
3129 
3130 void Assembler::xorps(XMMRegister dst, Address src) {
3131   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3132   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3133 }
3134 
3135 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3136   assert(VM_Version::supports_avx(), "");
3137   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3138 }
3139 
3140 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3141   assert(VM_Version::supports_avx(), "");
3142   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3143 }
3144 
3145 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3146   assert(VM_Version::supports_avx(), "");
3147   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3148 }
3149 
3150 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3151   assert(VM_Version::supports_avx(), "");
3152   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3153 }
3154 
3155 
3156 // Integer vector arithmetic
3157 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3158   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3159   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3160 }
3161 
3162 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3164   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3165 }
3166 
3167 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3168   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3169   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3170 }
3171 
3172 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3174   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3175 }
3176 
3177 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3178   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3179   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3180 }
3181 
3182 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3183   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3184   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3185 }
3186 
3187 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3188   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3189   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3190 }
3191 
3192 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3193   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3194   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3195 }
3196 
3197 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3198   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3199   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3200 }
3201 
3202 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3203   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3204   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3205 }
3206 
3207 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3208   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3209   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3210 }
3211 
3212 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3213   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3214   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3215 }
3216 
3217 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3218   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3219   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3220 }
3221 
3222 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3223   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3224   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3225 }
3226 
3227 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3228   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3229   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3230 }
3231 
3232 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3233   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3234   emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3235 }
3236 
3237 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3238   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3239   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3240 }
3241 
3242 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3243   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3244   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3245 }
3246 
3247 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3248   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3249   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3250 }
3251 
3252 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3253   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3254   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3255 }
3256 
3257 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3258   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3259   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3260 }
3261 
3262 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3263   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3264   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3265 }
3266 
3267 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3268   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3269   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3270 }
3271 
3272 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3273   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3274   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3275 }
3276 
3277 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3278   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3279   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3280 }
3281 
3282 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3283   assert(VM_Version::supports_sse4_1(), "");
3284   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3285   emit_int8(0x40);
3286   emit_int8((unsigned char)(0xC0 | encode));
3287 }
3288 
3289 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3290   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3291   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3292 }
3293 
3294 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3295   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3296   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3297   emit_int8(0x40);
3298   emit_int8((unsigned char)(0xC0 | encode));
3299 }
3300 
3301 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3302   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3303   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3304 }
3305 
3306 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3307   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3308   InstructionMark im(this);
3309   int dst_enc = dst->encoding();
3310   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3311   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3312   emit_int8(0x40);
3313   emit_operand(dst, src);
3314 }
3315 
3316 // Shift packed integers left by specified number of bits.
3317 void Assembler::psllw(XMMRegister dst, int shift) {
3318   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3319   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3320   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3321   emit_int8(0x71);
3322   emit_int8((unsigned char)(0xC0 | encode));
3323   emit_int8(shift & 0xFF);
3324 }
3325 
3326 void Assembler::pslld(XMMRegister dst, int shift) {
3327   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3328   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3329   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3330   emit_int8(0x72);
3331   emit_int8((unsigned char)(0xC0 | encode));
3332   emit_int8(shift & 0xFF);
3333 }
3334 
3335 void Assembler::psllq(XMMRegister dst, int shift) {
3336   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3337   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3338   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3339   emit_int8(0x73);
3340   emit_int8((unsigned char)(0xC0 | encode));
3341   emit_int8(shift & 0xFF);
3342 }
3343 
3344 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3345   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3346   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3347 }
3348 
3349 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3350   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3351   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3352 }
3353 
3354 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3355   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3356   emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3357 }
3358 
3359 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3360   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3361   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3362   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3363   emit_int8(shift & 0xFF);
3364 }
3365 
3366 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3367   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3368   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3369   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3370   emit_int8(shift & 0xFF);
3371 }
3372 
3373 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3374   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3375   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3376   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3377   emit_int8(shift & 0xFF);
3378 }
3379 
3380 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3381   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3382   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3383 }
3384 
3385 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3386   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3387   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3388 }
3389 
3390 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3391   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3392   emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3393 }
3394 
3395 // Shift packed integers logically right by specified number of bits.
3396 void Assembler::psrlw(XMMRegister dst, int shift) {
3397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3398   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3399   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3400   emit_int8(0x71);
3401   emit_int8((unsigned char)(0xC0 | encode));
3402   emit_int8(shift & 0xFF);
3403 }
3404 
3405 void Assembler::psrld(XMMRegister dst, int shift) {
3406   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3407   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3408   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3409   emit_int8(0x72);
3410   emit_int8((unsigned char)(0xC0 | encode));
3411   emit_int8(shift & 0xFF);
3412 }
3413 
3414 void Assembler::psrlq(XMMRegister dst, int shift) {
3415   // Do not confuse it with psrldq SSE2 instruction which
3416   // shifts 128 bit value in xmm register by number of bytes.
3417   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3418   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3419   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3420   emit_int8(0x73);
3421   emit_int8((unsigned char)(0xC0 | encode));
3422   emit_int8(shift & 0xFF);
3423 }
3424 
3425 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3426   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3427   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3428 }
3429 
3430 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3431   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3432   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3433 }
3434 
3435 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3437   emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3438 }
3439 
3440 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3441   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3442   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3443   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3444   emit_int8(shift & 0xFF);
3445 }
3446 
3447 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3448   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3449   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3450   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3451   emit_int8(shift & 0xFF);
3452 }
3453 
3454 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3455   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3456   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3457   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3458   emit_int8(shift & 0xFF);
3459 }
3460 
3461 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3462   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3463   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3464 }
3465 
3466 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3467   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3468   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3469 }
3470 
3471 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3472   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3473   emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3474 }
3475 
3476 // Shift packed integers arithmetically right by specified number of bits.
3477 void Assembler::psraw(XMMRegister dst, int shift) {
3478   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3479   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3480   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3481   emit_int8(0x71);
3482   emit_int8((unsigned char)(0xC0 | encode));
3483   emit_int8(shift & 0xFF);
3484 }
3485 
3486 void Assembler::psrad(XMMRegister dst, int shift) {
3487   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3488   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3489   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3490   emit_int8(0x72);
3491   emit_int8((unsigned char)(0xC0 | encode));
3492   emit_int8(shift & 0xFF);
3493 }
3494 
3495 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3496   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3497   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3498 }
3499 
3500 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3501   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3502   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3503 }
3504 
3505 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3506   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3507   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3508   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3509   emit_int8(shift & 0xFF);
3510 }
3511 
3512 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3513   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3514   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3515   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3516   emit_int8(shift & 0xFF);
3517 }
3518 
3519 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3520   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3521   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3522 }
3523 
3524 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3525   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3526   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3527 }
3528 
3529 
3530 // AND packed integers
3531 void Assembler::pand(XMMRegister dst, XMMRegister src) {
3532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3533   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3534 }
3535 
3536 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3537   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3538   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3539 }
3540 
3541 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3542   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3543   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3544 }
3545 
3546 void Assembler::por(XMMRegister dst, XMMRegister src) {
3547   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3548   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3549 }
3550 
3551 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3552   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3553   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3554 }
3555 
3556 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3557   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3558   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3559 }
3560 
3561 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3562   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3563   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3564 }
3565 
3566 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3567   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3568   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3569 }
3570 
3571 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3572   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3573   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3574 }
3575 
3576 
3577 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3578   assert(VM_Version::supports_avx(), "");
3579   bool vector256 = true;
3580   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3581   emit_int8(0x18);
3582   emit_int8((unsigned char)(0xC0 | encode));
3583   // 0x00 - insert into lower 128 bits
3584   // 0x01 - insert into upper 128 bits
3585   emit_int8(0x01);
3586 }
3587 
3588 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3589   assert(VM_Version::supports_avx(), "");
3590   InstructionMark im(this);
3591   bool vector256 = true;
3592   assert(dst != xnoreg, "sanity");
3593   int dst_enc = dst->encoding();
3594   // swap src<->dst for encoding
3595   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3596   emit_int8(0x18);
3597   emit_operand(dst, src);
3598   // 0x01 - insert into upper 128 bits
3599   emit_int8(0x01);
3600 }
3601 
3602 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3603   assert(VM_Version::supports_avx(), "");
3604   InstructionMark im(this);
3605   bool vector256 = true;
3606   assert(src != xnoreg, "sanity");
3607   int src_enc = src->encoding();
3608   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3609   emit_int8(0x19);
3610   emit_operand(src, dst);
3611   // 0x01 - extract from upper 128 bits
3612   emit_int8(0x01);
3613 }
3614 
3615 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3616   assert(VM_Version::supports_avx2(), "");
3617   bool vector256 = true;
3618   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3619   emit_int8(0x38);
3620   emit_int8((unsigned char)(0xC0 | encode));
3621   // 0x00 - insert into lower 128 bits
3622   // 0x01 - insert into upper 128 bits
3623   emit_int8(0x01);
3624 }
3625 
3626 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3627   assert(VM_Version::supports_avx2(), "");
3628   InstructionMark im(this);
3629   bool vector256 = true;
3630   assert(dst != xnoreg, "sanity");
3631   int dst_enc = dst->encoding();
3632   // swap src<->dst for encoding
3633   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3634   emit_int8(0x38);
3635   emit_operand(dst, src);
3636   // 0x01 - insert into upper 128 bits
3637   emit_int8(0x01);
3638 }
3639 
3640 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3641   assert(VM_Version::supports_avx2(), "");
3642   InstructionMark im(this);
3643   bool vector256 = true;
3644   assert(src != xnoreg, "sanity");
3645   int src_enc = src->encoding();
3646   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3647   emit_int8(0x39);
3648   emit_operand(src, dst);
3649   // 0x01 - extract from upper 128 bits
3650   emit_int8(0x01);
3651 }
3652 
3653 // duplicate 4-bytes integer data from src into 8 locations in dest
3654 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
3655   assert(VM_Version::supports_avx2(), "");
3656   bool vector256 = true;
3657   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3658   emit_int8(0x58);
3659   emit_int8((unsigned char)(0xC0 | encode));
3660 }
3661 
3662 void Assembler::vzeroupper() {
3663   assert(VM_Version::supports_avx(), "");
3664   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3665   emit_int8(0x77);
3666 }
3667 
3668 
3669 #ifndef _LP64
3670 // 32bit only pieces of the assembler
3671 
3672 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3673   // NO PREFIX AS NEVER 64BIT
3674   InstructionMark im(this);
3675   emit_int8((unsigned char)0x81);
3676   emit_int8((unsigned char)(0xF8 | src1->encoding()));
3677   emit_data(imm32, rspec, 0);
3678 }
3679 
3680 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3681   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3682   InstructionMark im(this);
3683   emit_int8((unsigned char)0x81);
3684   emit_operand(rdi, src1);
3685   emit_data(imm32, rspec, 0);
3686 }
3687 
3688 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3689 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3690 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3691 void Assembler::cmpxchg8(Address adr) {
3692   InstructionMark im(this);
3693   emit_int8(0x0F);
3694   emit_int8((unsigned char)0xC7);
3695   emit_operand(rcx, adr);
3696 }
3697 
3698 void Assembler::decl(Register dst) {
3699   // Don't use it directly. Use MacroAssembler::decrementl() instead.
3700  emit_int8(0x48 | dst->encoding());
3701 }
3702 
3703 #endif // _LP64
3704 
3705 // 64bit typically doesn't use the x87 but needs to for the trig funcs
3706 
3707 void Assembler::fabs() {
3708   emit_int8((unsigned char)0xD9);
3709   emit_int8((unsigned char)0xE1);
3710 }
3711 
3712 void Assembler::fadd(int i) {
3713   emit_farith(0xD8, 0xC0, i);
3714 }
3715 
3716 void Assembler::fadd_d(Address src) {
3717   InstructionMark im(this);
3718   emit_int8((unsigned char)0xDC);
3719   emit_operand32(rax, src);
3720 }
3721 
3722 void Assembler::fadd_s(Address src) {
3723   InstructionMark im(this);
3724   emit_int8((unsigned char)0xD8);
3725   emit_operand32(rax, src);
3726 }
3727 
3728 void Assembler::fadda(int i) {
3729   emit_farith(0xDC, 0xC0, i);
3730 }
3731 
3732 void Assembler::faddp(int i) {
3733   emit_farith(0xDE, 0xC0, i);
3734 }
3735 
3736 void Assembler::fchs() {
3737   emit_int8((unsigned char)0xD9);
3738   emit_int8((unsigned char)0xE0);
3739 }
3740 
3741 void Assembler::fcom(int i) {
3742   emit_farith(0xD8, 0xD0, i);
3743 }
3744 
3745 void Assembler::fcomp(int i) {
3746   emit_farith(0xD8, 0xD8, i);
3747 }
3748 
3749 void Assembler::fcomp_d(Address src) {
3750   InstructionMark im(this);
3751   emit_int8((unsigned char)0xDC);
3752   emit_operand32(rbx, src);
3753 }
3754 
3755 void Assembler::fcomp_s(Address src) {
3756   InstructionMark im(this);
3757   emit_int8((unsigned char)0xD8);
3758   emit_operand32(rbx, src);
3759 }
3760 
3761 void Assembler::fcompp() {
3762   emit_int8((unsigned char)0xDE);
3763   emit_int8((unsigned char)0xD9);
3764 }
3765 
3766 void Assembler::fcos() {
3767   emit_int8((unsigned char)0xD9);
3768   emit_int8((unsigned char)0xFF);
3769 }
3770 
3771 void Assembler::fdecstp() {
3772   emit_int8((unsigned char)0xD9);
3773   emit_int8((unsigned char)0xF6);
3774 }
3775 
3776 void Assembler::fdiv(int i) {
3777   emit_farith(0xD8, 0xF0, i);
3778 }
3779 
3780 void Assembler::fdiv_d(Address src) {
3781   InstructionMark im(this);
3782   emit_int8((unsigned char)0xDC);
3783   emit_operand32(rsi, src);
3784 }
3785 
3786 void Assembler::fdiv_s(Address src) {
3787   InstructionMark im(this);
3788   emit_int8((unsigned char)0xD8);
3789   emit_operand32(rsi, src);
3790 }
3791 
3792 void Assembler::fdiva(int i) {
3793   emit_farith(0xDC, 0xF8, i);
3794 }
3795 
3796 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3797 //       is erroneous for some of the floating-point instructions below.
3798 
3799 void Assembler::fdivp(int i) {
3800   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3801 }
3802 
3803 void Assembler::fdivr(int i) {
3804   emit_farith(0xD8, 0xF8, i);
3805 }
3806 
3807 void Assembler::fdivr_d(Address src) {
3808   InstructionMark im(this);
3809   emit_int8((unsigned char)0xDC);
3810   emit_operand32(rdi, src);
3811 }
3812 
3813 void Assembler::fdivr_s(Address src) {
3814   InstructionMark im(this);
3815   emit_int8((unsigned char)0xD8);
3816   emit_operand32(rdi, src);
3817 }
3818 
3819 void Assembler::fdivra(int i) {
3820   emit_farith(0xDC, 0xF0, i);
3821 }
3822 
3823 void Assembler::fdivrp(int i) {
3824   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3825 }
3826 
3827 void Assembler::ffree(int i) {
3828   emit_farith(0xDD, 0xC0, i);
3829 }
3830 
3831 void Assembler::fild_d(Address adr) {
3832   InstructionMark im(this);
3833   emit_int8((unsigned char)0xDF);
3834   emit_operand32(rbp, adr);
3835 }
3836 
3837 void Assembler::fild_s(Address adr) {
3838   InstructionMark im(this);
3839   emit_int8((unsigned char)0xDB);
3840   emit_operand32(rax, adr);
3841 }
3842 
3843 void Assembler::fincstp() {
3844   emit_int8((unsigned char)0xD9);
3845   emit_int8((unsigned char)0xF7);
3846 }
3847 
3848 void Assembler::finit() {
3849   emit_int8((unsigned char)0x9B);
3850   emit_int8((unsigned char)0xDB);
3851   emit_int8((unsigned char)0xE3);
3852 }
3853 
3854 void Assembler::fist_s(Address adr) {
3855   InstructionMark im(this);
3856   emit_int8((unsigned char)0xDB);
3857   emit_operand32(rdx, adr);
3858 }
3859 
3860 void Assembler::fistp_d(Address adr) {
3861   InstructionMark im(this);
3862   emit_int8((unsigned char)0xDF);
3863   emit_operand32(rdi, adr);
3864 }
3865 
3866 void Assembler::fistp_s(Address adr) {
3867   InstructionMark im(this);
3868   emit_int8((unsigned char)0xDB);
3869   emit_operand32(rbx, adr);
3870 }
3871 
3872 void Assembler::fld1() {
3873   emit_int8((unsigned char)0xD9);
3874   emit_int8((unsigned char)0xE8);
3875 }
3876 
3877 void Assembler::fld_d(Address adr) {
3878   InstructionMark im(this);
3879   emit_int8((unsigned char)0xDD);
3880   emit_operand32(rax, adr);
3881 }
3882 
3883 void Assembler::fld_s(Address adr) {
3884   InstructionMark im(this);
3885   emit_int8((unsigned char)0xD9);
3886   emit_operand32(rax, adr);
3887 }
3888 
3889 
3890 void Assembler::fld_s(int index) {
3891   emit_farith(0xD9, 0xC0, index);
3892 }
3893 
3894 void Assembler::fld_x(Address adr) {
3895   InstructionMark im(this);
3896   emit_int8((unsigned char)0xDB);
3897   emit_operand32(rbp, adr);
3898 }
3899 
3900 void Assembler::fldcw(Address src) {
3901   InstructionMark im(this);
3902   emit_int8((unsigned char)0xD9);
3903   emit_operand32(rbp, src);
3904 }
3905 
3906 void Assembler::fldenv(Address src) {
3907   InstructionMark im(this);
3908   emit_int8((unsigned char)0xD9);
3909   emit_operand32(rsp, src);
3910 }
3911 
3912 void Assembler::fldlg2() {
3913   emit_int8((unsigned char)0xD9);
3914   emit_int8((unsigned char)0xEC);
3915 }
3916 
3917 void Assembler::fldln2() {
3918   emit_int8((unsigned char)0xD9);
3919   emit_int8((unsigned char)0xED);
3920 }
3921 
3922 void Assembler::fldz() {
3923   emit_int8((unsigned char)0xD9);
3924   emit_int8((unsigned char)0xEE);
3925 }
3926 
3927 void Assembler::flog() {
3928   fldln2();
3929   fxch();
3930   fyl2x();
3931 }
3932 
3933 void Assembler::flog10() {
3934   fldlg2();
3935   fxch();
3936   fyl2x();
3937 }
3938 
3939 void Assembler::fmul(int i) {
3940   emit_farith(0xD8, 0xC8, i);
3941 }
3942 
3943 void Assembler::fmul_d(Address src) {
3944   InstructionMark im(this);
3945   emit_int8((unsigned char)0xDC);
3946   emit_operand32(rcx, src);
3947 }
3948 
3949 void Assembler::fmul_s(Address src) {
3950   InstructionMark im(this);
3951   emit_int8((unsigned char)0xD8);
3952   emit_operand32(rcx, src);
3953 }
3954 
3955 void Assembler::fmula(int i) {
3956   emit_farith(0xDC, 0xC8, i);
3957 }
3958 
3959 void Assembler::fmulp(int i) {
3960   emit_farith(0xDE, 0xC8, i);
3961 }
3962 
3963 void Assembler::fnsave(Address dst) {
3964   InstructionMark im(this);
3965   emit_int8((unsigned char)0xDD);
3966   emit_operand32(rsi, dst);
3967 }
3968 
3969 void Assembler::fnstcw(Address src) {
3970   InstructionMark im(this);
3971   emit_int8((unsigned char)0x9B);
3972   emit_int8((unsigned char)0xD9);
3973   emit_operand32(rdi, src);
3974 }
3975 
3976 void Assembler::fnstsw_ax() {
3977   emit_int8((unsigned char)0xDF);
3978   emit_int8((unsigned char)0xE0);
3979 }
3980 
3981 void Assembler::fprem() {
3982   emit_int8((unsigned char)0xD9);
3983   emit_int8((unsigned char)0xF8);
3984 }
3985 
3986 void Assembler::fprem1() {
3987   emit_int8((unsigned char)0xD9);
3988   emit_int8((unsigned char)0xF5);
3989 }
3990 
3991 void Assembler::frstor(Address src) {
3992   InstructionMark im(this);
3993   emit_int8((unsigned char)0xDD);
3994   emit_operand32(rsp, src);
3995 }
3996 
3997 void Assembler::fsin() {
3998   emit_int8((unsigned char)0xD9);
3999   emit_int8((unsigned char)0xFE);
4000 }
4001 
4002 void Assembler::fsqrt() {
4003   emit_int8((unsigned char)0xD9);
4004   emit_int8((unsigned char)0xFA);
4005 }
4006 
4007 void Assembler::fst_d(Address adr) {
4008   InstructionMark im(this);
4009   emit_int8((unsigned char)0xDD);
4010   emit_operand32(rdx, adr);
4011 }
4012 
4013 void Assembler::fst_s(Address adr) {
4014   InstructionMark im(this);
4015   emit_int8((unsigned char)0xD9);
4016   emit_operand32(rdx, adr);
4017 }
4018 
4019 void Assembler::fstp_d(Address adr) {
4020   InstructionMark im(this);
4021   emit_int8((unsigned char)0xDD);
4022   emit_operand32(rbx, adr);
4023 }
4024 
4025 void Assembler::fstp_d(int index) {
4026   emit_farith(0xDD, 0xD8, index);
4027 }
4028 
4029 void Assembler::fstp_s(Address adr) {
4030   InstructionMark im(this);
4031   emit_int8((unsigned char)0xD9);
4032   emit_operand32(rbx, adr);
4033 }
4034 
4035 void Assembler::fstp_x(Address adr) {
4036   InstructionMark im(this);
4037   emit_int8((unsigned char)0xDB);
4038   emit_operand32(rdi, adr);
4039 }
4040 
4041 void Assembler::fsub(int i) {
4042   emit_farith(0xD8, 0xE0, i);
4043 }
4044 
4045 void Assembler::fsub_d(Address src) {
4046   InstructionMark im(this);
4047   emit_int8((unsigned char)0xDC);
4048   emit_operand32(rsp, src);
4049 }
4050 
4051 void Assembler::fsub_s(Address src) {
4052   InstructionMark im(this);
4053   emit_int8((unsigned char)0xD8);
4054   emit_operand32(rsp, src);
4055 }
4056 
4057 void Assembler::fsuba(int i) {
4058   emit_farith(0xDC, 0xE8, i);
4059 }
4060 
4061 void Assembler::fsubp(int i) {
4062   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4063 }
4064 
4065 void Assembler::fsubr(int i) {
4066   emit_farith(0xD8, 0xE8, i);
4067 }
4068 
4069 void Assembler::fsubr_d(Address src) {
4070   InstructionMark im(this);
4071   emit_int8((unsigned char)0xDC);
4072   emit_operand32(rbp, src);
4073 }
4074 
4075 void Assembler::fsubr_s(Address src) {
4076   InstructionMark im(this);
4077   emit_int8((unsigned char)0xD8);
4078   emit_operand32(rbp, src);
4079 }
4080 
4081 void Assembler::fsubra(int i) {
4082   emit_farith(0xDC, 0xE0, i);
4083 }
4084 
4085 void Assembler::fsubrp(int i) {
4086   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4087 }
4088 
4089 void Assembler::ftan() {
4090   emit_int8((unsigned char)0xD9);
4091   emit_int8((unsigned char)0xF2);
4092   emit_int8((unsigned char)0xDD);
4093   emit_int8((unsigned char)0xD8);
4094 }
4095 
4096 void Assembler::ftst() {
4097   emit_int8((unsigned char)0xD9);
4098   emit_int8((unsigned char)0xE4);
4099 }
4100 
4101 void Assembler::fucomi(int i) {
4102   // make sure the instruction is supported (introduced for P6, together with cmov)
4103   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4104   emit_farith(0xDB, 0xE8, i);
4105 }
4106 
4107 void Assembler::fucomip(int i) {
4108   // make sure the instruction is supported (introduced for P6, together with cmov)
4109   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4110   emit_farith(0xDF, 0xE8, i);
4111 }
4112 
4113 void Assembler::fwait() {
4114   emit_int8((unsigned char)0x9B);
4115 }
4116 
4117 void Assembler::fxch(int i) {
4118   emit_farith(0xD9, 0xC8, i);
4119 }
4120 
4121 void Assembler::fyl2x() {
4122   emit_int8((unsigned char)0xD9);
4123   emit_int8((unsigned char)0xF1);
4124 }
4125 
4126 void Assembler::frndint() {
4127   emit_int8((unsigned char)0xD9);
4128   emit_int8((unsigned char)0xFC);
4129 }
4130 
4131 void Assembler::f2xm1() {
4132   emit_int8((unsigned char)0xD9);
4133   emit_int8((unsigned char)0xF0);
4134 }
4135 
4136 void Assembler::fldl2e() {
4137   emit_int8((unsigned char)0xD9);
4138   emit_int8((unsigned char)0xEA);
4139 }
4140 
4141 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4142 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4143 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4144 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4145 
4146 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4147 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4148   if (pre > 0) {
4149     emit_int8(simd_pre[pre]);
4150   }
4151   if (rex_w) {
4152     prefixq(adr, xreg);
4153   } else {
4154     prefix(adr, xreg);
4155   }
4156   if (opc > 0) {
4157     emit_int8(0x0F);
4158     int opc2 = simd_opc[opc];
4159     if (opc2 > 0) {
4160       emit_int8(opc2);
4161     }
4162   }
4163 }
4164 
4165 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4166   if (pre > 0) {
4167     emit_int8(simd_pre[pre]);
4168   }
4169   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4170                           prefix_and_encode(dst_enc, src_enc);
4171   if (opc > 0) {
4172     emit_int8(0x0F);
4173     int opc2 = simd_opc[opc];
4174     if (opc2 > 0) {
4175       emit_int8(opc2);
4176     }
4177   }
4178   return encode;
4179 }
4180 
4181 
4182 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4183   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4184     prefix(VEX_3bytes);
4185 
4186     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4187     byte1 = (~byte1) & 0xE0;
4188     byte1 |= opc;
4189     emit_int8(byte1);
4190 
4191     int byte2 = ((~nds_enc) & 0xf) << 3;
4192     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4193     emit_int8(byte2);
4194   } else {
4195     prefix(VEX_2bytes);
4196 
4197     int byte1 = vex_r ? VEX_R : 0;
4198     byte1 = (~byte1) & 0x80;
4199     byte1 |= ((~nds_enc) & 0xf) << 3;
4200     byte1 |= (vector256 ? 4 : 0) | pre;
4201     emit_int8(byte1);
4202   }
4203 }
4204 
4205 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4206   bool vex_r = (xreg_enc >= 8);
4207   bool vex_b = adr.base_needs_rex();
4208   bool vex_x = adr.index_needs_rex();
4209   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4210 }
4211 
4212 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4213   bool vex_r = (dst_enc >= 8);
4214   bool vex_b = (src_enc >= 8);
4215   bool vex_x = false;
4216   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4217   return (((dst_enc & 7) << 3) | (src_enc & 7));
4218 }
4219 
4220 
4221 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4222   if (UseAVX > 0) {
4223     int xreg_enc = xreg->encoding();
4224     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4225     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4226   } else {
4227     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4228     rex_prefix(adr, xreg, pre, opc, rex_w);
4229   }
4230 }
4231 
4232 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4233   int dst_enc = dst->encoding();
4234   int src_enc = src->encoding();
4235   if (UseAVX > 0) {
4236     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4237     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4238   } else {
4239     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4240     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4241   }
4242 }
4243 
4244 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4245   InstructionMark im(this);
4246   simd_prefix(dst, dst, src, pre);
4247   emit_int8(opcode);
4248   emit_operand(dst, src);
4249 }
4250 
4251 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4252   int encode = simd_prefix_and_encode(dst, dst, src, pre);
4253   emit_int8(opcode);
4254   emit_int8((unsigned char)(0xC0 | encode));
4255 }
4256 
4257 // Versions with no second source register (non-destructive source).
4258 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4259   InstructionMark im(this);
4260   simd_prefix(dst, xnoreg, src, pre);
4261   emit_int8(opcode);
4262   emit_operand(dst, src);
4263 }
4264 
4265 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4266   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4267   emit_int8(opcode);
4268   emit_int8((unsigned char)(0xC0 | encode));
4269 }
4270 
4271 // 3-operands AVX instructions
4272 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4273                                Address src, VexSimdPrefix pre, bool vector256) {
4274   InstructionMark im(this);
4275   vex_prefix(dst, nds, src, pre, vector256);
4276   emit_int8(opcode);
4277   emit_operand(dst, src);
4278 }
4279 
4280 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4281                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
4282   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4283   emit_int8(opcode);
4284   emit_int8((unsigned char)(0xC0 | encode));
4285 }
4286 
4287 #ifndef _LP64
4288 
4289 void Assembler::incl(Register dst) {
4290   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4291   emit_int8(0x40 | dst->encoding());
4292 }
4293 
4294 void Assembler::lea(Register dst, Address src) {
4295   leal(dst, src);
4296 }
4297 
4298 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4299   InstructionMark im(this);
4300   emit_int8((unsigned char)0xC7);
4301   emit_operand(rax, dst);
4302   emit_data((int)imm32, rspec, 0);
4303 }
4304 
4305 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4306   InstructionMark im(this);
4307   int encode = prefix_and_encode(dst->encoding());
4308   emit_int8((unsigned char)(0xB8 | encode));
4309   emit_data((int)imm32, rspec, 0);
4310 }
4311 
4312 void Assembler::popa() { // 32bit
4313   emit_int8(0x61);
4314 }
4315 
4316 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4317   InstructionMark im(this);
4318   emit_int8(0x68);
4319   emit_data(imm32, rspec, 0);
4320 }
4321 
4322 void Assembler::pusha() { // 32bit
4323   emit_int8(0x60);
4324 }
4325 
4326 void Assembler::set_byte_if_not_zero(Register dst) {
4327   emit_int8(0x0F);
4328   emit_int8((unsigned char)0x95);
4329   emit_int8((unsigned char)(0xE0 | dst->encoding()));
4330 }
4331 
4332 void Assembler::shldl(Register dst, Register src) {
4333   emit_int8(0x0F);
4334   emit_int8((unsigned char)0xA5);
4335   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4336 }
4337 
4338 void Assembler::shrdl(Register dst, Register src) {
4339   emit_int8(0x0F);
4340   emit_int8((unsigned char)0xAD);
4341   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4342 }
4343 
4344 #else // LP64
4345 
4346 void Assembler::set_byte_if_not_zero(Register dst) {
4347   int enc = prefix_and_encode(dst->encoding(), true);
4348   emit_int8(0x0F);
4349   emit_int8((unsigned char)0x95);
4350   emit_int8((unsigned char)(0xE0 | enc));
4351 }
4352 
4353 // 64bit only pieces of the assembler
4354 // This should only be used by 64bit instructions that can use rip-relative
4355 // it cannot be used by instructions that want an immediate value.
4356 
4357 bool Assembler::reachable(AddressLiteral adr) {
4358   int64_t disp;
4359   // None will force a 64bit literal to the code stream. Likely a placeholder
4360   // for something that will be patched later and we need to certain it will
4361   // always be reachable.
4362   if (adr.reloc() == relocInfo::none) {
4363     return false;
4364   }
4365   if (adr.reloc() == relocInfo::internal_word_type) {
4366     // This should be rip relative and easily reachable.
4367     return true;
4368   }
4369   if (adr.reloc() == relocInfo::virtual_call_type ||
4370       adr.reloc() == relocInfo::opt_virtual_call_type ||
4371       adr.reloc() == relocInfo::static_call_type ||
4372       adr.reloc() == relocInfo::static_stub_type ) {
4373     // This should be rip relative within the code cache and easily
4374     // reachable until we get huge code caches. (At which point
4375     // ic code is going to have issues).
4376     return true;
4377   }
4378   if (adr.reloc() != relocInfo::external_word_type &&
4379       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4380       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4381       adr.reloc() != relocInfo::runtime_call_type ) {
4382     return false;
4383   }
4384 
4385   // Stress the correction code
4386   if (ForceUnreachable) {
4387     // Must be runtimecall reloc, see if it is in the codecache
4388     // Flipping stuff in the codecache to be unreachable causes issues
4389     // with things like inline caches where the additional instructions
4390     // are not handled.
4391     if (CodeCache::find_blob(adr._target) == NULL) {
4392       return false;
4393     }
4394   }
4395   // For external_word_type/runtime_call_type if it is reachable from where we
4396   // are now (possibly a temp buffer) and where we might end up
4397   // anywhere in the codeCache then we are always reachable.
4398   // This would have to change if we ever save/restore shared code
4399   // to be more pessimistic.
4400   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4401   if (!is_simm32(disp)) return false;
4402   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4403   if (!is_simm32(disp)) return false;
4404 
4405   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4406 
4407   // Because rip relative is a disp + address_of_next_instruction and we
4408   // don't know the value of address_of_next_instruction we apply a fudge factor
4409   // to make sure we will be ok no matter the size of the instruction we get placed into.
4410   // We don't have to fudge the checks above here because they are already worst case.
4411 
4412   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4413   // + 4 because better safe than sorry.
4414   const int fudge = 12 + 4;
4415   if (disp < 0) {
4416     disp -= fudge;
4417   } else {
4418     disp += fudge;
4419   }
4420   return is_simm32(disp);
4421 }
4422 
4423 // Check if the polling page is not reachable from the code cache using rip-relative
4424 // addressing.
4425 bool Assembler::is_polling_page_far() {
4426   intptr_t addr = (intptr_t)os::get_polling_page();
4427   return ForceUnreachable ||
4428          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4429          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4430 }
4431 
4432 void Assembler::emit_data64(jlong data,
4433                             relocInfo::relocType rtype,
4434                             int format) {
4435   if (rtype == relocInfo::none) {
4436     emit_int64(data);
4437   } else {
4438     emit_data64(data, Relocation::spec_simple(rtype), format);
4439   }
4440 }
4441 
4442 void Assembler::emit_data64(jlong data,
4443                             RelocationHolder const& rspec,
4444                             int format) {
4445   assert(imm_operand == 0, "default format must be immediate in this file");
4446   assert(imm_operand == format, "must be immediate");
4447   assert(inst_mark() != NULL, "must be inside InstructionMark");
4448   // Do not use AbstractAssembler::relocate, which is not intended for
4449   // embedded words.  Instead, relocate to the enclosing instruction.
4450   code_section()->relocate(inst_mark(), rspec, format);
4451 #ifdef ASSERT
4452   check_relocation(rspec, format);
4453 #endif
4454   emit_int64(data);
4455 }
4456 
4457 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4458   if (reg_enc >= 8) {
4459     prefix(REX_B);
4460     reg_enc -= 8;
4461   } else if (byteinst && reg_enc >= 4) {
4462     prefix(REX);
4463   }
4464   return reg_enc;
4465 }
4466 
4467 int Assembler::prefixq_and_encode(int reg_enc) {
4468   if (reg_enc < 8) {
4469     prefix(REX_W);
4470   } else {
4471     prefix(REX_WB);
4472     reg_enc -= 8;
4473   }
4474   return reg_enc;
4475 }
4476 
4477 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4478   if (dst_enc < 8) {
4479     if (src_enc >= 8) {
4480       prefix(REX_B);
4481       src_enc -= 8;
4482     } else if (byteinst && src_enc >= 4) {
4483       prefix(REX);
4484     }
4485   } else {
4486     if (src_enc < 8) {
4487       prefix(REX_R);
4488     } else {
4489       prefix(REX_RB);
4490       src_enc -= 8;
4491     }
4492     dst_enc -= 8;
4493   }
4494   return dst_enc << 3 | src_enc;
4495 }
4496 
4497 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4498   if (dst_enc < 8) {
4499     if (src_enc < 8) {
4500       prefix(REX_W);
4501     } else {
4502       prefix(REX_WB);
4503       src_enc -= 8;
4504     }
4505   } else {
4506     if (src_enc < 8) {
4507       prefix(REX_WR);
4508     } else {
4509       prefix(REX_WRB);
4510       src_enc -= 8;
4511     }
4512     dst_enc -= 8;
4513   }
4514   return dst_enc << 3 | src_enc;
4515 }
4516 
4517 void Assembler::prefix(Register reg) {
4518   if (reg->encoding() >= 8) {
4519     prefix(REX_B);
4520   }
4521 }
4522 
4523 void Assembler::prefix(Address adr) {
4524   if (adr.base_needs_rex()) {
4525     if (adr.index_needs_rex()) {
4526       prefix(REX_XB);
4527     } else {
4528       prefix(REX_B);
4529     }
4530   } else {
4531     if (adr.index_needs_rex()) {
4532       prefix(REX_X);
4533     }
4534   }
4535 }
4536 
4537 void Assembler::prefixq(Address adr) {
4538   if (adr.base_needs_rex()) {
4539     if (adr.index_needs_rex()) {
4540       prefix(REX_WXB);
4541     } else {
4542       prefix(REX_WB);
4543     }
4544   } else {
4545     if (adr.index_needs_rex()) {
4546       prefix(REX_WX);
4547     } else {
4548       prefix(REX_W);
4549     }
4550   }
4551 }
4552 
4553 
4554 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4555   if (reg->encoding() < 8) {
4556     if (adr.base_needs_rex()) {
4557       if (adr.index_needs_rex()) {
4558         prefix(REX_XB);
4559       } else {
4560         prefix(REX_B);
4561       }
4562     } else {
4563       if (adr.index_needs_rex()) {
4564         prefix(REX_X);
4565       } else if (byteinst && reg->encoding() >= 4 ) {
4566         prefix(REX);
4567       }
4568     }
4569   } else {
4570     if (adr.base_needs_rex()) {
4571       if (adr.index_needs_rex()) {
4572         prefix(REX_RXB);
4573       } else {
4574         prefix(REX_RB);
4575       }
4576     } else {
4577       if (adr.index_needs_rex()) {
4578         prefix(REX_RX);
4579       } else {
4580         prefix(REX_R);
4581       }
4582     }
4583   }
4584 }
4585 
4586 void Assembler::prefixq(Address adr, Register src) {
4587   if (src->encoding() < 8) {
4588     if (adr.base_needs_rex()) {
4589       if (adr.index_needs_rex()) {
4590         prefix(REX_WXB);
4591       } else {
4592         prefix(REX_WB);
4593       }
4594     } else {
4595       if (adr.index_needs_rex()) {
4596         prefix(REX_WX);
4597       } else {
4598         prefix(REX_W);
4599       }
4600     }
4601   } else {
4602     if (adr.base_needs_rex()) {
4603       if (adr.index_needs_rex()) {
4604         prefix(REX_WRXB);
4605       } else {
4606         prefix(REX_WRB);
4607       }
4608     } else {
4609       if (adr.index_needs_rex()) {
4610         prefix(REX_WRX);
4611       } else {
4612         prefix(REX_WR);
4613       }
4614     }
4615   }
4616 }
4617 
4618 void Assembler::prefix(Address adr, XMMRegister reg) {
4619   if (reg->encoding() < 8) {
4620     if (adr.base_needs_rex()) {
4621       if (adr.index_needs_rex()) {
4622         prefix(REX_XB);
4623       } else {
4624         prefix(REX_B);
4625       }
4626     } else {
4627       if (adr.index_needs_rex()) {
4628         prefix(REX_X);
4629       }
4630     }
4631   } else {
4632     if (adr.base_needs_rex()) {
4633       if (adr.index_needs_rex()) {
4634         prefix(REX_RXB);
4635       } else {
4636         prefix(REX_RB);
4637       }
4638     } else {
4639       if (adr.index_needs_rex()) {
4640         prefix(REX_RX);
4641       } else {
4642         prefix(REX_R);
4643       }
4644     }
4645   }
4646 }
4647 
4648 void Assembler::prefixq(Address adr, XMMRegister src) {
4649   if (src->encoding() < 8) {
4650     if (adr.base_needs_rex()) {
4651       if (adr.index_needs_rex()) {
4652         prefix(REX_WXB);
4653       } else {
4654         prefix(REX_WB);
4655       }
4656     } else {
4657       if (adr.index_needs_rex()) {
4658         prefix(REX_WX);
4659       } else {
4660         prefix(REX_W);
4661       }
4662     }
4663   } else {
4664     if (adr.base_needs_rex()) {
4665       if (adr.index_needs_rex()) {
4666         prefix(REX_WRXB);
4667       } else {
4668         prefix(REX_WRB);
4669       }
4670     } else {
4671       if (adr.index_needs_rex()) {
4672         prefix(REX_WRX);
4673       } else {
4674         prefix(REX_WR);
4675       }
4676     }
4677   }
4678 }
4679 
4680 void Assembler::adcq(Register dst, int32_t imm32) {
4681   (void) prefixq_and_encode(dst->encoding());
4682   emit_arith(0x81, 0xD0, dst, imm32);
4683 }
4684 
4685 void Assembler::adcq(Register dst, Address src) {
4686   InstructionMark im(this);
4687   prefixq(src, dst);
4688   emit_int8(0x13);
4689   emit_operand(dst, src);
4690 }
4691 
4692 void Assembler::adcq(Register dst, Register src) {
4693   (int) prefixq_and_encode(dst->encoding(), src->encoding());
4694   emit_arith(0x13, 0xC0, dst, src);
4695 }
4696 
4697 void Assembler::addq(Address dst, int32_t imm32) {
4698   InstructionMark im(this);
4699   prefixq(dst);
4700   emit_arith_operand(0x81, rax, dst,imm32);
4701 }
4702 
4703 void Assembler::addq(Address dst, Register src) {
4704   InstructionMark im(this);
4705   prefixq(dst, src);
4706   emit_int8(0x01);
4707   emit_operand(src, dst);
4708 }
4709 
4710 void Assembler::addq(Register dst, int32_t imm32) {
4711   (void) prefixq_and_encode(dst->encoding());
4712   emit_arith(0x81, 0xC0, dst, imm32);
4713 }
4714 
4715 void Assembler::addq(Register dst, Address src) {
4716   InstructionMark im(this);
4717   prefixq(src, dst);
4718   emit_int8(0x03);
4719   emit_operand(dst, src);
4720 }
4721 
4722 void Assembler::addq(Register dst, Register src) {
4723   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4724   emit_arith(0x03, 0xC0, dst, src);
4725 }
4726 
4727 void Assembler::andq(Address dst, int32_t imm32) {
4728   InstructionMark im(this);
4729   prefixq(dst);
4730   emit_int8((unsigned char)0x81);
4731   emit_operand(rsp, dst, 4);
4732   emit_long(imm32);
4733 }
4734 
4735 void Assembler::andq(Register dst, int32_t imm32) {
4736   (void) prefixq_and_encode(dst->encoding());
4737   emit_arith(0x81, 0xE0, dst, imm32);
4738 }
4739 
4740 void Assembler::andq(Register dst, Address src) {
4741   InstructionMark im(this);
4742   prefixq(src, dst);
4743   emit_int8(0x23);
4744   emit_operand(dst, src);
4745 }
4746 
4747 void Assembler::andq(Register dst, Register src) {
4748   (int) prefixq_and_encode(dst->encoding(), src->encoding());
4749   emit_arith(0x23, 0xC0, dst, src);
4750 }
4751 
4752 void Assembler::bsfq(Register dst, Register src) {
4753   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4754   emit_int8(0x0F);
4755   emit_int8((unsigned char)0xBC);
4756   emit_int8((unsigned char)(0xC0 | encode));
4757 }
4758 
4759 void Assembler::bsrq(Register dst, Register src) {
4760   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
4761   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4762   emit_int8(0x0F);
4763   emit_int8((unsigned char)0xBD);
4764   emit_int8((unsigned char)(0xC0 | encode));
4765 }
4766 
4767 void Assembler::bswapq(Register reg) {
4768   int encode = prefixq_and_encode(reg->encoding());
4769   emit_int8(0x0F);
4770   emit_int8((unsigned char)(0xC8 | encode));
4771 }
4772 
4773 void Assembler::cdqq() {
4774   prefix(REX_W);
4775   emit_int8((unsigned char)0x99);
4776 }
4777 
4778 void Assembler::clflush(Address adr) {
4779   prefix(adr);
4780   emit_int8(0x0F);
4781   emit_int8((unsigned char)0xAE);
4782   emit_operand(rdi, adr);
4783 }
4784 
4785 void Assembler::cmovq(Condition cc, Register dst, Register src) {
4786   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4787   emit_int8(0x0F);
4788   emit_int8(0x40 | cc);
4789   emit_int8((unsigned char)(0xC0 | encode));
4790 }
4791 
4792 void Assembler::cmovq(Condition cc, Register dst, Address src) {
4793   InstructionMark im(this);
4794   prefixq(src, dst);
4795   emit_int8(0x0F);
4796   emit_int8(0x40 | cc);
4797   emit_operand(dst, src);
4798 }
4799 
4800 void Assembler::cmpq(Address dst, int32_t imm32) {
4801   InstructionMark im(this);
4802   prefixq(dst);
4803   emit_int8((unsigned char)0x81);
4804   emit_operand(rdi, dst, 4);
4805   emit_long(imm32);
4806 }
4807 
4808 void Assembler::cmpq(Register dst, int32_t imm32) {
4809   (void) prefixq_and_encode(dst->encoding());
4810   emit_arith(0x81, 0xF8, dst, imm32);
4811 }
4812 
4813 void Assembler::cmpq(Address dst, Register src) {
4814   InstructionMark im(this);
4815   prefixq(dst, src);
4816   emit_int8(0x3B);
4817   emit_operand(src, dst);
4818 }
4819 
4820 void Assembler::cmpq(Register dst, Register src) {
4821   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4822   emit_arith(0x3B, 0xC0, dst, src);
4823 }
4824 
4825 void Assembler::cmpq(Register dst, Address  src) {
4826   InstructionMark im(this);
4827   prefixq(src, dst);
4828   emit_int8(0x3B);
4829   emit_operand(dst, src);
4830 }
4831 
4832 void Assembler::cmpxchgq(Register reg, Address adr) {
4833   InstructionMark im(this);
4834   prefixq(adr, reg);
4835   emit_int8(0x0F);
4836   emit_int8((unsigned char)0xB1);
4837   emit_operand(reg, adr);
4838 }
4839 
4840 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
4841   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4842   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
4843   emit_int8(0x2A);
4844   emit_int8((unsigned char)(0xC0 | encode));
4845 }
4846 
4847 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
4848   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4849   InstructionMark im(this);
4850   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
4851   emit_int8(0x2A);
4852   emit_operand(dst, src);
4853 }
4854 
4855 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
4856   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4857   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
4858   emit_int8(0x2A);
4859   emit_int8((unsigned char)(0xC0 | encode));
4860 }
4861 
4862 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
4863   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4864   InstructionMark im(this);
4865   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
4866   emit_int8(0x2A);
4867   emit_operand(dst, src);
4868 }
4869 
4870 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
4871   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4872   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
4873   emit_int8(0x2C);
4874   emit_int8((unsigned char)(0xC0 | encode));
4875 }
4876 
4877 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
4878   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4879   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
4880   emit_int8(0x2C);
4881   emit_int8((unsigned char)(0xC0 | encode));
4882 }
4883 
4884 void Assembler::decl(Register dst) {
4885   // Don't use it directly. Use MacroAssembler::decrementl() instead.
4886   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
4887   int encode = prefix_and_encode(dst->encoding());
4888   emit_int8((unsigned char)0xFF);
4889   emit_int8((unsigned char)(0xC8 | encode));
4890 }
4891 
4892 void Assembler::decq(Register dst) {
4893   // Don't use it directly. Use MacroAssembler::decrementq() instead.
4894   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4895   int encode = prefixq_and_encode(dst->encoding());
4896   emit_int8((unsigned char)0xFF);
4897   emit_int8(0xC8 | encode);
4898 }
4899 
4900 void Assembler::decq(Address dst) {
4901   // Don't use it directly. Use MacroAssembler::decrementq() instead.
4902   InstructionMark im(this);
4903   prefixq(dst);
4904   emit_int8((unsigned char)0xFF);
4905   emit_operand(rcx, dst);
4906 }
4907 
4908 void Assembler::fxrstor(Address src) {
4909   prefixq(src);
4910   emit_int8(0x0F);
4911   emit_int8((unsigned char)0xAE);
4912   emit_operand(as_Register(1), src);
4913 }
4914 
4915 void Assembler::fxsave(Address dst) {
4916   prefixq(dst);
4917   emit_int8(0x0F);
4918   emit_int8((unsigned char)0xAE);
4919   emit_operand(as_Register(0), dst);
4920 }
4921 
4922 void Assembler::idivq(Register src) {
4923   int encode = prefixq_and_encode(src->encoding());
4924   emit_int8((unsigned char)0xF7);
4925   emit_int8((unsigned char)(0xF8 | encode));
4926 }
4927 
4928 void Assembler::imulq(Register dst, Register src) {
4929   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4930   emit_int8(0x0F);
4931   emit_int8((unsigned char)0xAF);
4932   emit_int8((unsigned char)(0xC0 | encode));
4933 }
4934 
4935 void Assembler::imulq(Register dst, Register src, int value) {
4936   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4937   if (is8bit(value)) {
4938     emit_int8(0x6B);
4939     emit_int8((unsigned char)(0xC0 | encode));
4940     emit_int8(value & 0xFF);
4941   } else {
4942     emit_int8(0x69);
4943     emit_int8((unsigned char)(0xC0 | encode));
4944     emit_long(value);
4945   }
4946 }
4947 
4948 void Assembler::incl(Register dst) {
4949   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4950   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4951   int encode = prefix_and_encode(dst->encoding());
4952   emit_int8((unsigned char)0xFF);
4953   emit_int8((unsigned char)(0xC0 | encode));
4954 }
4955 
4956 void Assembler::incq(Register dst) {
4957   // Don't use it directly. Use MacroAssembler::incrementq() instead.
4958   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4959   int encode = prefixq_and_encode(dst->encoding());
4960   emit_int8((unsigned char)0xFF);
4961   emit_int8((unsigned char)(0xC0 | encode));
4962 }
4963 
4964 void Assembler::incq(Address dst) {
4965   // Don't use it directly. Use MacroAssembler::incrementq() instead.
4966   InstructionMark im(this);
4967   prefixq(dst);
4968   emit_int8((unsigned char)0xFF);
4969   emit_operand(rax, dst);
4970 }
4971 
4972 void Assembler::lea(Register dst, Address src) {
4973   leaq(dst, src);
4974 }
4975 
4976 void Assembler::leaq(Register dst, Address src) {
4977   InstructionMark im(this);
4978   prefixq(src, dst);
4979   emit_int8((unsigned char)0x8D);
4980   emit_operand(dst, src);
4981 }
4982 
4983 void Assembler::mov64(Register dst, int64_t imm64) {
4984   InstructionMark im(this);
4985   int encode = prefixq_and_encode(dst->encoding());
4986   emit_int8((unsigned char)(0xB8 | encode));
4987   emit_int64(imm64);
4988 }
4989 
4990 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4991   InstructionMark im(this);
4992   int encode = prefixq_and_encode(dst->encoding());
4993   emit_int8(0xB8 | encode);
4994   emit_data64(imm64, rspec);
4995 }
4996 
4997 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4998   InstructionMark im(this);
4999   int encode = prefix_and_encode(dst->encoding());
5000   emit_int8((unsigned char)(0xB8 | encode));
5001   emit_data((int)imm32, rspec, narrow_oop_operand);
5002 }
5003 
5004 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
5005   InstructionMark im(this);
5006   prefix(dst);
5007   emit_int8((unsigned char)0xC7);
5008   emit_operand(rax, dst, 4);
5009   emit_data((int)imm32, rspec, narrow_oop_operand);
5010 }
5011 
5012 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5013   InstructionMark im(this);
5014   int encode = prefix_and_encode(src1->encoding());
5015   emit_int8((unsigned char)0x81);
5016   emit_int8((unsigned char)(0xF8 | encode));
5017   emit_data((int)imm32, rspec, narrow_oop_operand);
5018 }
5019 
5020 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5021   InstructionMark im(this);
5022   prefix(src1);
5023   emit_int8((unsigned char)0x81);
5024   emit_operand(rax, src1, 4);
5025   emit_data((int)imm32, rspec, narrow_oop_operand);
5026 }
5027 
5028 void Assembler::lzcntq(Register dst, Register src) {
5029   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5030   emit_int8((unsigned char)0xF3);
5031   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5032   emit_int8(0x0F);
5033   emit_int8((unsigned char)0xBD);
5034   emit_int8((unsigned char)(0xC0 | encode));
5035 }
5036 
5037 void Assembler::movdq(XMMRegister dst, Register src) {
5038   // table D-1 says MMX/SSE2
5039   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5040   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5041   emit_int8(0x6E);
5042   emit_int8((unsigned char)(0xC0 | encode));
5043 }
5044 
5045 void Assembler::movdq(Register dst, XMMRegister src) {
5046   // table D-1 says MMX/SSE2
5047   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5048   // swap src/dst to get correct prefix
5049   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5050   emit_int8(0x7E);
5051   emit_int8((unsigned char)(0xC0 | encode));
5052 }
5053 
5054 void Assembler::movq(Register dst, Register src) {
5055   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5056   emit_int8((unsigned char)0x8B);
5057   emit_int8((unsigned char)(0xC0 | encode));
5058 }
5059 
5060 void Assembler::movq(Register dst, Address src) {
5061   InstructionMark im(this);
5062   prefixq(src, dst);
5063   emit_int8((unsigned char)0x8B);
5064   emit_operand(dst, src);
5065 }
5066 
5067 void Assembler::movq(Address dst, Register src) {
5068   InstructionMark im(this);
5069   prefixq(dst, src);
5070   emit_int8((unsigned char)0x89);
5071   emit_operand(src, dst);
5072 }
5073 
5074 void Assembler::movsbq(Register dst, Address src) {
5075   InstructionMark im(this);
5076   prefixq(src, dst);
5077   emit_int8(0x0F);
5078   emit_int8((unsigned char)0xBE);
5079   emit_operand(dst, src);
5080 }
5081 
5082 void Assembler::movsbq(Register dst, Register src) {
5083   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5084   emit_int8(0x0F);
5085   emit_int8((unsigned char)0xBE);
5086   emit_int8((unsigned char)(0xC0 | encode));
5087 }
5088 
5089 void Assembler::movslq(Register dst, int32_t imm32) {
5090   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5091   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5092   // as a result we shouldn't use until tested at runtime...
5093   ShouldNotReachHere();
5094   InstructionMark im(this);
5095   int encode = prefixq_and_encode(dst->encoding());
5096   emit_int8((unsigned char)(0xC7 | encode));
5097   emit_long(imm32);
5098 }
5099 
5100 void Assembler::movslq(Address dst, int32_t imm32) {
5101   assert(is_simm32(imm32), "lost bits");
5102   InstructionMark im(this);
5103   prefixq(dst);
5104   emit_int8((unsigned char)0xC7);
5105   emit_operand(rax, dst, 4);
5106   emit_long(imm32);
5107 }
5108 
5109 void Assembler::movslq(Register dst, Address src) {
5110   InstructionMark im(this);
5111   prefixq(src, dst);
5112   emit_int8(0x63);
5113   emit_operand(dst, src);
5114 }
5115 
5116 void Assembler::movslq(Register dst, Register src) {
5117   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5118   emit_int8(0x63);
5119   emit_int8((unsigned char)(0xC0 | encode));
5120 }
5121 
5122 void Assembler::movswq(Register dst, Address src) {
5123   InstructionMark im(this);
5124   prefixq(src, dst);
5125   emit_int8(0x0F);
5126   emit_int8((unsigned char)0xBF);
5127   emit_operand(dst, src);
5128 }
5129 
5130 void Assembler::movswq(Register dst, Register src) {
5131   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5132   emit_int8((unsigned char)0x0F);
5133   emit_int8((unsigned char)0xBF);
5134   emit_int8((unsigned char)(0xC0 | encode));
5135 }
5136 
5137 void Assembler::movzbq(Register dst, Address src) {
5138   InstructionMark im(this);
5139   prefixq(src, dst);
5140   emit_int8((unsigned char)0x0F);
5141   emit_int8((unsigned char)0xB6);
5142   emit_operand(dst, src);
5143 }
5144 
5145 void Assembler::movzbq(Register dst, Register src) {
5146   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5147   emit_int8(0x0F);
5148   emit_int8((unsigned char)0xB6);
5149   emit_int8(0xC0 | encode);
5150 }
5151 
5152 void Assembler::movzwq(Register dst, Address src) {
5153   InstructionMark im(this);
5154   prefixq(src, dst);
5155   emit_int8((unsigned char)0x0F);
5156   emit_int8((unsigned char)0xB7);
5157   emit_operand(dst, src);
5158 }
5159 
5160 void Assembler::movzwq(Register dst, Register src) {
5161   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5162   emit_int8((unsigned char)0x0F);
5163   emit_int8((unsigned char)0xB7);
5164   emit_int8((unsigned char)(0xC0 | encode));
5165 }
5166 
5167 void Assembler::negq(Register dst) {
5168   int encode = prefixq_and_encode(dst->encoding());
5169   emit_int8((unsigned char)0xF7);
5170   emit_int8((unsigned char)(0xD8 | encode));
5171 }
5172 
5173 void Assembler::notq(Register dst) {
5174   int encode = prefixq_and_encode(dst->encoding());
5175   emit_int8((unsigned char)0xF7);
5176   emit_int8((unsigned char)(0xD0 | encode));
5177 }
5178 
5179 void Assembler::orq(Address dst, int32_t imm32) {
5180   InstructionMark im(this);
5181   prefixq(dst);
5182   emit_int8((unsigned char)0x81);
5183   emit_operand(rcx, dst, 4);
5184   emit_long(imm32);
5185 }
5186 
5187 void Assembler::orq(Register dst, int32_t imm32) {
5188   (void) prefixq_and_encode(dst->encoding());
5189   emit_arith(0x81, 0xC8, dst, imm32);
5190 }
5191 
5192 void Assembler::orq(Register dst, Address src) {
5193   InstructionMark im(this);
5194   prefixq(src, dst);
5195   emit_int8(0x0B);
5196   emit_operand(dst, src);
5197 }
5198 
5199 void Assembler::orq(Register dst, Register src) {
5200   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5201   emit_arith(0x0B, 0xC0, dst, src);
5202 }
5203 
5204 void Assembler::popa() { // 64bit
5205   movq(r15, Address(rsp, 0));
5206   movq(r14, Address(rsp, wordSize));
5207   movq(r13, Address(rsp, 2 * wordSize));
5208   movq(r12, Address(rsp, 3 * wordSize));
5209   movq(r11, Address(rsp, 4 * wordSize));
5210   movq(r10, Address(rsp, 5 * wordSize));
5211   movq(r9,  Address(rsp, 6 * wordSize));
5212   movq(r8,  Address(rsp, 7 * wordSize));
5213   movq(rdi, Address(rsp, 8 * wordSize));
5214   movq(rsi, Address(rsp, 9 * wordSize));
5215   movq(rbp, Address(rsp, 10 * wordSize));
5216   // skip rsp
5217   movq(rbx, Address(rsp, 12 * wordSize));
5218   movq(rdx, Address(rsp, 13 * wordSize));
5219   movq(rcx, Address(rsp, 14 * wordSize));
5220   movq(rax, Address(rsp, 15 * wordSize));
5221 
5222   addq(rsp, 16 * wordSize);
5223 }
5224 
5225 void Assembler::popcntq(Register dst, Address src) {
5226   assert(VM_Version::supports_popcnt(), "must support");
5227   InstructionMark im(this);
5228   emit_int8((unsigned char)0xF3);
5229   prefixq(src, dst);
5230   emit_int8((unsigned char)0x0F);
5231   emit_int8((unsigned char)0xB8);
5232   emit_operand(dst, src);
5233 }
5234 
5235 void Assembler::popcntq(Register dst, Register src) {
5236   assert(VM_Version::supports_popcnt(), "must support");
5237   emit_int8((unsigned char)0xF3);
5238   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5239   emit_int8((unsigned char)0x0F);
5240   emit_int8((unsigned char)0xB8);
5241   emit_int8((unsigned char)(0xC0 | encode));
5242 }
5243 
5244 void Assembler::popq(Address dst) {
5245   InstructionMark im(this);
5246   prefixq(dst);
5247   emit_int8((unsigned char)0x8F);
5248   emit_operand(rax, dst);
5249 }
5250 
5251 void Assembler::pusha() { // 64bit
5252   // we have to store original rsp.  ABI says that 128 bytes
5253   // below rsp are local scratch.
5254   movq(Address(rsp, -5 * wordSize), rsp);
5255 
5256   subq(rsp, 16 * wordSize);
5257 
5258   movq(Address(rsp, 15 * wordSize), rax);
5259   movq(Address(rsp, 14 * wordSize), rcx);
5260   movq(Address(rsp, 13 * wordSize), rdx);
5261   movq(Address(rsp, 12 * wordSize), rbx);
5262   // skip rsp
5263   movq(Address(rsp, 10 * wordSize), rbp);
5264   movq(Address(rsp, 9 * wordSize), rsi);
5265   movq(Address(rsp, 8 * wordSize), rdi);
5266   movq(Address(rsp, 7 * wordSize), r8);
5267   movq(Address(rsp, 6 * wordSize), r9);
5268   movq(Address(rsp, 5 * wordSize), r10);
5269   movq(Address(rsp, 4 * wordSize), r11);
5270   movq(Address(rsp, 3 * wordSize), r12);
5271   movq(Address(rsp, 2 * wordSize), r13);
5272   movq(Address(rsp, wordSize), r14);
5273   movq(Address(rsp, 0), r15);
5274 }
5275 
5276 void Assembler::pushq(Address src) {
5277   InstructionMark im(this);
5278   prefixq(src);
5279   emit_int8((unsigned char)0xFF);
5280   emit_operand(rsi, src);
5281 }
5282 
5283 void Assembler::rclq(Register dst, int imm8) {
5284   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5285   int encode = prefixq_and_encode(dst->encoding());
5286   if (imm8 == 1) {
5287     emit_int8((unsigned char)0xD1);
5288     emit_int8((unsigned char)(0xD0 | encode));
5289   } else {
5290     emit_int8((unsigned char)0xC1);
5291     emit_int8((unsigned char)(0xD0 | encode));
5292     emit_int8(imm8);
5293   }
5294 }
5295 void Assembler::sarq(Register dst, int imm8) {
5296   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5297   int encode = prefixq_and_encode(dst->encoding());
5298   if (imm8 == 1) {
5299     emit_int8((unsigned char)0xD1);
5300     emit_int8((unsigned char)(0xF8 | encode));
5301   } else {
5302     emit_int8((unsigned char)0xC1);
5303     emit_int8((unsigned char)(0xF8 | encode));
5304     emit_int8(imm8);
5305   }
5306 }
5307 
5308 void Assembler::sarq(Register dst) {
5309   int encode = prefixq_and_encode(dst->encoding());
5310   emit_int8((unsigned char)0xD3);
5311   emit_int8((unsigned char)(0xF8 | encode));
5312 }
5313 
5314 void Assembler::sbbq(Address dst, int32_t imm32) {
5315   InstructionMark im(this);
5316   prefixq(dst);
5317   emit_arith_operand(0x81, rbx, dst, imm32);
5318 }
5319 
5320 void Assembler::sbbq(Register dst, int32_t imm32) {
5321   (void) prefixq_and_encode(dst->encoding());
5322   emit_arith(0x81, 0xD8, dst, imm32);
5323 }
5324 
5325 void Assembler::sbbq(Register dst, Address src) {
5326   InstructionMark im(this);
5327   prefixq(src, dst);
5328   emit_int8(0x1B);
5329   emit_operand(dst, src);
5330 }
5331 
5332 void Assembler::sbbq(Register dst, Register src) {
5333   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5334   emit_arith(0x1B, 0xC0, dst, src);
5335 }
5336 
5337 void Assembler::shlq(Register dst, int imm8) {
5338   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5339   int encode = prefixq_and_encode(dst->encoding());
5340   if (imm8 == 1) {
5341     emit_int8((unsigned char)0xD1);
5342     emit_int8((unsigned char)(0xE0 | encode));
5343   } else {
5344     emit_int8((unsigned char)0xC1);
5345     emit_int8((unsigned char)(0xE0 | encode));
5346     emit_int8(imm8);
5347   }
5348 }
5349 
5350 void Assembler::shlq(Register dst) {
5351   int encode = prefixq_and_encode(dst->encoding());
5352   emit_int8((unsigned char)0xD3);
5353   emit_int8((unsigned char)(0xE0 | encode));
5354 }
5355 
5356 void Assembler::shrq(Register dst, int imm8) {
5357   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5358   int encode = prefixq_and_encode(dst->encoding());
5359   emit_int8((unsigned char)0xC1);
5360   emit_int8((unsigned char)(0xE8 | encode));
5361   emit_int8(imm8);
5362 }
5363 
5364 void Assembler::shrq(Register dst) {
5365   int encode = prefixq_and_encode(dst->encoding());
5366   emit_int8((unsigned char)0xD3);
5367   emit_int8(0xE8 | encode);
5368 }
5369 
5370 void Assembler::subq(Address dst, int32_t imm32) {
5371   InstructionMark im(this);
5372   prefixq(dst);
5373   emit_arith_operand(0x81, rbp, dst, imm32);
5374 }
5375 
5376 void Assembler::subq(Address dst, Register src) {
5377   InstructionMark im(this);
5378   prefixq(dst, src);
5379   emit_int8(0x29);
5380   emit_operand(src, dst);
5381 }
5382 
5383 void Assembler::subq(Register dst, int32_t imm32) {
5384   (void) prefixq_and_encode(dst->encoding());
5385   emit_arith(0x81, 0xE8, dst, imm32);
5386 }
5387 
5388 // Force generation of a 4 byte immediate value even if it fits into 8bit
5389 void Assembler::subq_imm32(Register dst, int32_t imm32) {
5390   (void) prefixq_and_encode(dst->encoding());
5391   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5392 }
5393 
5394 void Assembler::subq(Register dst, Address src) {
5395   InstructionMark im(this);
5396   prefixq(src, dst);
5397   emit_int8(0x2B);
5398   emit_operand(dst, src);
5399 }
5400 
5401 void Assembler::subq(Register dst, Register src) {
5402   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5403   emit_arith(0x2B, 0xC0, dst, src);
5404 }
5405 
5406 void Assembler::testq(Register dst, int32_t imm32) {
5407   // not using emit_arith because test
5408   // doesn't support sign-extension of
5409   // 8bit operands
5410   int encode = dst->encoding();
5411   if (encode == 0) {
5412     prefix(REX_W);
5413     emit_int8((unsigned char)0xA9);
5414   } else {
5415     encode = prefixq_and_encode(encode);
5416     emit_int8((unsigned char)0xF7);
5417     emit_int8((unsigned char)(0xC0 | encode));
5418   }
5419   emit_long(imm32);
5420 }
5421 
5422 void Assembler::testq(Register dst, Register src) {
5423   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5424   emit_arith(0x85, 0xC0, dst, src);
5425 }
5426 
5427 void Assembler::xaddq(Address dst, Register src) {
5428   InstructionMark im(this);
5429   prefixq(dst, src);
5430   emit_int8(0x0F);
5431   emit_int8((unsigned char)0xC1);
5432   emit_operand(src, dst);
5433 }
5434 
5435 void Assembler::xchgq(Register dst, Address src) {
5436   InstructionMark im(this);
5437   prefixq(src, dst);
5438   emit_int8((unsigned char)0x87);
5439   emit_operand(dst, src);
5440 }
5441 
5442 void Assembler::xchgq(Register dst, Register src) {
5443   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5444   emit_int8((unsigned char)0x87);
5445   emit_int8((unsigned char)(0xc0 | encode));
5446 }
5447 
5448 void Assembler::xorq(Register dst, Register src) {
5449   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5450   emit_arith(0x33, 0xC0, dst, src);
5451 }
5452 
5453 void Assembler::xorq(Register dst, Address src) {
5454   InstructionMark im(this);
5455   prefixq(src, dst);
5456   emit_int8(0x33);
5457   emit_operand(dst, src);
5458 }
5459 
5460 #endif // !LP64