1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc_interface/collectedHeap.inline.hpp"
  29 #include "interpreter/interpreter.hpp"
  30 #include "memory/cardTableModRefBS.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/interfaceSupport.hpp"
  35 #include "runtime/objectMonitor.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #ifndef SERIALGC
  40 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  41 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  42 #include "gc_implementation/g1/heapRegion.hpp"
  43 #endif
  44 
  45 #ifdef PRODUCT
  46 #define BLOCK_COMMENT(str) /* nothing */
  47 #define STOP(error) stop(error)
  48 #else
  49 #define BLOCK_COMMENT(str) block_comment(str)
  50 #define STOP(error) block_comment(error); stop(error)
  51 #endif
  52 
  53 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  54 // Implementation of AddressLiteral
  55 
  56 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  57   _is_lval = false;
  58   _target = target;
  59   switch (rtype) {
  60   case relocInfo::oop_type:
  61   case relocInfo::metadata_type:
  62     // Oops are a special case. Normally they would be their own section
  63     // but in cases like icBuffer they are literals in the code stream that
  64     // we don't have a section for. We use none so that we get a literal address
  65     // which is always patchable.
  66     break;
  67   case relocInfo::external_word_type:
  68     _rspec = external_word_Relocation::spec(target);
  69     break;
  70   case relocInfo::internal_word_type:
  71     _rspec = internal_word_Relocation::spec(target);
  72     break;
  73   case relocInfo::opt_virtual_call_type:
  74     _rspec = opt_virtual_call_Relocation::spec();
  75     break;
  76   case relocInfo::static_call_type:
  77     _rspec = static_call_Relocation::spec();
  78     break;
  79   case relocInfo::runtime_call_type:
  80     _rspec = runtime_call_Relocation::spec();
  81     break;
  82   case relocInfo::poll_type:
  83   case relocInfo::poll_return_type:
  84     _rspec = Relocation::spec_simple(rtype);
  85     break;
  86   case relocInfo::none:
  87     break;
  88   default:
  89     ShouldNotReachHere();
  90     break;
  91   }
  92 }
  93 
  94 // Implementation of Address
  95 
  96 #ifdef _LP64
  97 
  98 Address Address::make_array(ArrayAddress adr) {
  99   // Not implementable on 64bit machines
 100   // Should have been handled higher up the call chain.
 101   ShouldNotReachHere();
 102   return Address();
 103 }
 104 
 105 // exceedingly dangerous constructor
 106 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 107   _base  = noreg;
 108   _index = noreg;
 109   _scale = no_scale;
 110   _disp  = disp;
 111   switch (rtype) {
 112     case relocInfo::external_word_type:
 113       _rspec = external_word_Relocation::spec(loc);
 114       break;
 115     case relocInfo::internal_word_type:
 116       _rspec = internal_word_Relocation::spec(loc);
 117       break;
 118     case relocInfo::runtime_call_type:
 119       // HMM
 120       _rspec = runtime_call_Relocation::spec();
 121       break;
 122     case relocInfo::poll_type:
 123     case relocInfo::poll_return_type:
 124       _rspec = Relocation::spec_simple(rtype);
 125       break;
 126     case relocInfo::none:
 127       break;
 128     default:
 129       ShouldNotReachHere();
 130   }
 131 }
 132 #else // LP64
 133 
 134 Address Address::make_array(ArrayAddress adr) {
 135   AddressLiteral base = adr.base();
 136   Address index = adr.index();
 137   assert(index._disp == 0, "must not have disp"); // maybe it can?
 138   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 139   array._rspec = base._rspec;
 140   return array;
 141 }
 142 
 143 // exceedingly dangerous constructor
 144 Address::Address(address loc, RelocationHolder spec) {
 145   _base  = noreg;
 146   _index = noreg;
 147   _scale = no_scale;
 148   _disp  = (intptr_t) loc;
 149   _rspec = spec;
 150 }
 151 
 152 #endif // _LP64
 153 
 154 
 155 
 156 // Convert the raw encoding form into the form expected by the constructor for
 157 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 158 // that to noreg for the Address constructor.
 159 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 160   RelocationHolder rspec;
 161   if (disp_reloc != relocInfo::none) {
 162     rspec = Relocation::spec_simple(disp_reloc);
 163   }
 164   bool valid_index = index != rsp->encoding();
 165   if (valid_index) {
 166     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 167     madr._rspec = rspec;
 168     return madr;
 169   } else {
 170     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 171     madr._rspec = rspec;
 172     return madr;
 173   }
 174 }
 175 
 176 // Implementation of Assembler
 177 
 178 int AbstractAssembler::code_fill_byte() {
 179   return (u_char)'\xF4'; // hlt
 180 }
 181 
 182 // make this go away someday
 183 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 184   if (rtype == relocInfo::none)
 185         emit_long(data);
 186   else  emit_data(data, Relocation::spec_simple(rtype), format);
 187 }
 188 
 189 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 190   assert(imm_operand == 0, "default format must be immediate in this file");
 191   assert(inst_mark() != NULL, "must be inside InstructionMark");
 192   if (rspec.type() !=  relocInfo::none) {
 193     #ifdef ASSERT
 194       check_relocation(rspec, format);
 195     #endif
 196     // Do not use AbstractAssembler::relocate, which is not intended for
 197     // embedded words.  Instead, relocate to the enclosing instruction.
 198 
 199     // hack. call32 is too wide for mask so use disp32
 200     if (format == call32_operand)
 201       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 202     else
 203       code_section()->relocate(inst_mark(), rspec, format);
 204   }
 205   emit_long(data);
 206 }
 207 
 208 static int encode(Register r) {
 209   int enc = r->encoding();
 210   if (enc >= 8) {
 211     enc -= 8;
 212   }
 213   return enc;
 214 }
 215 
 216 static int encode(XMMRegister r) {
 217   int enc = r->encoding();
 218   if (enc >= 8) {
 219     enc -= 8;
 220   }
 221   return enc;
 222 }
 223 
 224 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 225   assert(dst->has_byte_register(), "must have byte register");
 226   assert(isByte(op1) && isByte(op2), "wrong opcode");
 227   assert(isByte(imm8), "not a byte");
 228   assert((op1 & 0x01) == 0, "should be 8bit operation");
 229   emit_int8(op1);
 230   emit_int8(op2 | encode(dst));
 231   emit_int8(imm8);
 232 }
 233 
 234 
 235 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 236   assert(isByte(op1) && isByte(op2), "wrong opcode");
 237   assert((op1 & 0x01) == 1, "should be 32bit operation");
 238   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 239   if (is8bit(imm32)) {
 240     emit_int8(op1 | 0x02); // set sign bit
 241     emit_int8(op2 | encode(dst));
 242     emit_int8(imm32 & 0xFF);
 243   } else {
 244     emit_int8(op1);
 245     emit_int8(op2 | encode(dst));
 246     emit_long(imm32);
 247   }
 248 }
 249 
 250 // Force generation of a 4 byte immediate value even if it fits into 8bit
 251 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 252   assert(isByte(op1) && isByte(op2), "wrong opcode");
 253   assert((op1 & 0x01) == 1, "should be 32bit operation");
 254   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 255   emit_int8(op1);
 256   emit_int8(op2 | encode(dst));
 257   emit_long(imm32);
 258 }
 259 
 260 // immediate-to-memory forms
 261 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 262   assert((op1 & 0x01) == 1, "should be 32bit operation");
 263   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 264   if (is8bit(imm32)) {
 265     emit_int8(op1 | 0x02); // set sign bit
 266     emit_operand(rm, adr, 1);
 267     emit_int8(imm32 & 0xFF);
 268   } else {
 269     emit_int8(op1);
 270     emit_operand(rm, adr, 4);
 271     emit_long(imm32);
 272   }
 273 }
 274 
 275 
 276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 277   assert(isByte(op1) && isByte(op2), "wrong opcode");
 278   emit_int8(op1);
 279   emit_int8(op2 | encode(dst) << 3 | encode(src));
 280 }
 281 
 282 
 283 void Assembler::emit_operand(Register reg, Register base, Register index,
 284                              Address::ScaleFactor scale, int disp,
 285                              RelocationHolder const& rspec,
 286                              int rip_relative_correction) {
 287   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 288 
 289   // Encode the registers as needed in the fields they are used in
 290 
 291   int regenc = encode(reg) << 3;
 292   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 293   int baseenc = base->is_valid() ? encode(base) : 0;
 294 
 295   if (base->is_valid()) {
 296     if (index->is_valid()) {
 297       assert(scale != Address::no_scale, "inconsistent address");
 298       // [base + index*scale + disp]
 299       if (disp == 0 && rtype == relocInfo::none  &&
 300           base != rbp LP64_ONLY(&& base != r13)) {
 301         // [base + index*scale]
 302         // [00 reg 100][ss index base]
 303         assert(index != rsp, "illegal addressing mode");
 304         emit_int8(0x04 | regenc);
 305         emit_int8(scale << 6 | indexenc | baseenc);
 306       } else if (is8bit(disp) && rtype == relocInfo::none) {
 307         // [base + index*scale + imm8]
 308         // [01 reg 100][ss index base] imm8
 309         assert(index != rsp, "illegal addressing mode");
 310         emit_int8(0x44 | regenc);
 311         emit_int8(scale << 6 | indexenc | baseenc);
 312         emit_int8(disp & 0xFF);
 313       } else {
 314         // [base + index*scale + disp32]
 315         // [10 reg 100][ss index base] disp32
 316         assert(index != rsp, "illegal addressing mode");
 317         emit_int8(0x84 | regenc);
 318         emit_int8(scale << 6 | indexenc | baseenc);
 319         emit_data(disp, rspec, disp32_operand);
 320       }
 321     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 322       // [rsp + disp]
 323       if (disp == 0 && rtype == relocInfo::none) {
 324         // [rsp]
 325         // [00 reg 100][00 100 100]
 326         emit_int8(0x04 | regenc);
 327         emit_int8(0x24);
 328       } else if (is8bit(disp) && rtype == relocInfo::none) {
 329         // [rsp + imm8]
 330         // [01 reg 100][00 100 100] disp8
 331         emit_int8(0x44 | regenc);
 332         emit_int8(0x24);
 333         emit_int8(disp & 0xFF);
 334       } else {
 335         // [rsp + imm32]
 336         // [10 reg 100][00 100 100] disp32
 337         emit_int8(0x84 | regenc);
 338         emit_int8(0x24);
 339         emit_data(disp, rspec, disp32_operand);
 340       }
 341     } else {
 342       // [base + disp]
 343       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 344       if (disp == 0 && rtype == relocInfo::none &&
 345           base != rbp LP64_ONLY(&& base != r13)) {
 346         // [base]
 347         // [00 reg base]
 348         emit_int8(0x00 | regenc | baseenc);
 349       } else if (is8bit(disp) && rtype == relocInfo::none) {
 350         // [base + disp8]
 351         // [01 reg base] disp8
 352         emit_int8(0x40 | regenc | baseenc);
 353         emit_int8(disp & 0xFF);
 354       } else {
 355         // [base + disp32]
 356         // [10 reg base] disp32
 357         emit_int8(0x80 | regenc | baseenc);
 358         emit_data(disp, rspec, disp32_operand);
 359       }
 360     }
 361   } else {
 362     if (index->is_valid()) {
 363       assert(scale != Address::no_scale, "inconsistent address");
 364       // [index*scale + disp]
 365       // [00 reg 100][ss index 101] disp32
 366       assert(index != rsp, "illegal addressing mode");
 367       emit_int8(0x04 | regenc);
 368       emit_int8(scale << 6 | indexenc | 0x05);
 369       emit_data(disp, rspec, disp32_operand);
 370     } else if (rtype != relocInfo::none ) {
 371       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 372       // [00 000 101] disp32
 373 
 374       emit_int8(0x05 | regenc);
 375       // Note that the RIP-rel. correction applies to the generated
 376       // disp field, but _not_ to the target address in the rspec.
 377 
 378       // disp was created by converting the target address minus the pc
 379       // at the start of the instruction. That needs more correction here.
 380       // intptr_t disp = target - next_ip;
 381       assert(inst_mark() != NULL, "must be inside InstructionMark");
 382       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 383       int64_t adjusted = disp;
 384       // Do rip-rel adjustment for 64bit
 385       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 386       assert(is_simm32(adjusted),
 387              "must be 32bit offset (RIP relative address)");
 388       emit_data((int32_t) adjusted, rspec, disp32_operand);
 389 
 390     } else {
 391       // 32bit never did this, did everything as the rip-rel/disp code above
 392       // [disp] ABSOLUTE
 393       // [00 reg 100][00 100 101] disp32
 394       emit_int8(0x04 | regenc);
 395       emit_int8(0x25);
 396       emit_data(disp, rspec, disp32_operand);
 397     }
 398   }
 399 }
 400 
 401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 402                              Address::ScaleFactor scale, int disp,
 403                              RelocationHolder const& rspec) {
 404   emit_operand((Register)reg, base, index, scale, disp, rspec);
 405 }
 406 
 407 // Secret local extension to Assembler::WhichOperand:
 408 #define end_pc_operand (_WhichOperand_limit)
 409 
 410 address Assembler::locate_operand(address inst, WhichOperand which) {
 411   // Decode the given instruction, and return the address of
 412   // an embedded 32-bit operand word.
 413 
 414   // If "which" is disp32_operand, selects the displacement portion
 415   // of an effective address specifier.
 416   // If "which" is imm64_operand, selects the trailing immediate constant.
 417   // If "which" is call32_operand, selects the displacement of a call or jump.
 418   // Caller is responsible for ensuring that there is such an operand,
 419   // and that it is 32/64 bits wide.
 420 
 421   // If "which" is end_pc_operand, find the end of the instruction.
 422 
 423   address ip = inst;
 424   bool is_64bit = false;
 425 
 426   debug_only(bool has_disp32 = false);
 427   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 428 
 429   again_after_prefix:
 430   switch (0xFF & *ip++) {
 431 
 432   // These convenience macros generate groups of "case" labels for the switch.
 433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 435              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 436 #define REP16(x) REP8((x)+0): \
 437               case REP8((x)+8)
 438 
 439   case CS_segment:
 440   case SS_segment:
 441   case DS_segment:
 442   case ES_segment:
 443   case FS_segment:
 444   case GS_segment:
 445     // Seems dubious
 446     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 447     assert(ip == inst+1, "only one prefix allowed");
 448     goto again_after_prefix;
 449 
 450   case 0x67:
 451   case REX:
 452   case REX_B:
 453   case REX_X:
 454   case REX_XB:
 455   case REX_R:
 456   case REX_RB:
 457   case REX_RX:
 458   case REX_RXB:
 459     NOT_LP64(assert(false, "64bit prefixes"));
 460     goto again_after_prefix;
 461 
 462   case REX_W:
 463   case REX_WB:
 464   case REX_WX:
 465   case REX_WXB:
 466   case REX_WR:
 467   case REX_WRB:
 468   case REX_WRX:
 469   case REX_WRXB:
 470     NOT_LP64(assert(false, "64bit prefixes"));
 471     is_64bit = true;
 472     goto again_after_prefix;
 473 
 474   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 475   case 0x88: // movb a, r
 476   case 0x89: // movl a, r
 477   case 0x8A: // movb r, a
 478   case 0x8B: // movl r, a
 479   case 0x8F: // popl a
 480     debug_only(has_disp32 = true);
 481     break;
 482 
 483   case 0x68: // pushq #32
 484     if (which == end_pc_operand) {
 485       return ip + 4;
 486     }
 487     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 488     return ip;                  // not produced by emit_operand
 489 
 490   case 0x66: // movw ... (size prefix)
 491     again_after_size_prefix2:
 492     switch (0xFF & *ip++) {
 493     case REX:
 494     case REX_B:
 495     case REX_X:
 496     case REX_XB:
 497     case REX_R:
 498     case REX_RB:
 499     case REX_RX:
 500     case REX_RXB:
 501     case REX_W:
 502     case REX_WB:
 503     case REX_WX:
 504     case REX_WXB:
 505     case REX_WR:
 506     case REX_WRB:
 507     case REX_WRX:
 508     case REX_WRXB:
 509       NOT_LP64(assert(false, "64bit prefix found"));
 510       goto again_after_size_prefix2;
 511     case 0x8B: // movw r, a
 512     case 0x89: // movw a, r
 513       debug_only(has_disp32 = true);
 514       break;
 515     case 0xC7: // movw a, #16
 516       debug_only(has_disp32 = true);
 517       tail_size = 2;  // the imm16
 518       break;
 519     case 0x0F: // several SSE/SSE2 variants
 520       ip--;    // reparse the 0x0F
 521       goto again_after_prefix;
 522     default:
 523       ShouldNotReachHere();
 524     }
 525     break;
 526 
 527   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 528     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 529     // these asserts are somewhat nonsensical
 530 #ifndef _LP64
 531     assert(which == imm_operand || which == disp32_operand,
 532            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
 533 #else
 534     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 535            which == narrow_oop_operand && !is_64bit,
 536            err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip));
 537 #endif // _LP64
 538     return ip;
 539 
 540   case 0x69: // imul r, a, #32
 541   case 0xC7: // movl a, #32(oop?)
 542     tail_size = 4;
 543     debug_only(has_disp32 = true); // has both kinds of operands!
 544     break;
 545 
 546   case 0x0F: // movx..., etc.
 547     switch (0xFF & *ip++) {
 548     case 0x3A: // pcmpestri
 549       tail_size = 1;
 550     case 0x38: // ptest, pmovzxbw
 551       ip++; // skip opcode
 552       debug_only(has_disp32 = true); // has both kinds of operands!
 553       break;
 554 
 555     case 0x70: // pshufd r, r/a, #8
 556       debug_only(has_disp32 = true); // has both kinds of operands!
 557     case 0x73: // psrldq r, #8
 558       tail_size = 1;
 559       break;
 560 
 561     case 0x12: // movlps
 562     case 0x28: // movaps
 563     case 0x2E: // ucomiss
 564     case 0x2F: // comiss
 565     case 0x54: // andps
 566     case 0x55: // andnps
 567     case 0x56: // orps
 568     case 0x57: // xorps
 569     case 0x6E: // movd
 570     case 0x7E: // movd
 571     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 572       debug_only(has_disp32 = true);
 573       break;
 574 
 575     case 0xAD: // shrd r, a, %cl
 576     case 0xAF: // imul r, a
 577     case 0xBE: // movsbl r, a (movsxb)
 578     case 0xBF: // movswl r, a (movsxw)
 579     case 0xB6: // movzbl r, a (movzxb)
 580     case 0xB7: // movzwl r, a (movzxw)
 581     case REP16(0x40): // cmovl cc, r, a
 582     case 0xB0: // cmpxchgb
 583     case 0xB1: // cmpxchg
 584     case 0xC1: // xaddl
 585     case 0xC7: // cmpxchg8
 586     case REP16(0x90): // setcc a
 587       debug_only(has_disp32 = true);
 588       // fall out of the switch to decode the address
 589       break;
 590 
 591     case 0xC4: // pinsrw r, a, #8
 592       debug_only(has_disp32 = true);
 593     case 0xC5: // pextrw r, r, #8
 594       tail_size = 1;  // the imm8
 595       break;
 596 
 597     case 0xAC: // shrd r, a, #8
 598       debug_only(has_disp32 = true);
 599       tail_size = 1;  // the imm8
 600       break;
 601 
 602     case REP16(0x80): // jcc rdisp32
 603       if (which == end_pc_operand)  return ip + 4;
 604       assert(which == call32_operand, "jcc has no disp32 or imm");
 605       return ip;
 606     default:
 607       ShouldNotReachHere();
 608     }
 609     break;
 610 
 611   case 0x81: // addl a, #32; addl r, #32
 612     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 613     // on 32bit in the case of cmpl, the imm might be an oop
 614     tail_size = 4;
 615     debug_only(has_disp32 = true); // has both kinds of operands!
 616     break;
 617 
 618   case 0x83: // addl a, #8; addl r, #8
 619     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 620     debug_only(has_disp32 = true); // has both kinds of operands!
 621     tail_size = 1;
 622     break;
 623 
 624   case 0x9B:
 625     switch (0xFF & *ip++) {
 626     case 0xD9: // fnstcw a
 627       debug_only(has_disp32 = true);
 628       break;
 629     default:
 630       ShouldNotReachHere();
 631     }
 632     break;
 633 
 634   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 635   case REP4(0x10): // adc...
 636   case REP4(0x20): // and...
 637   case REP4(0x30): // xor...
 638   case REP4(0x08): // or...
 639   case REP4(0x18): // sbb...
 640   case REP4(0x28): // sub...
 641   case 0xF7: // mull a
 642   case 0x8D: // lea r, a
 643   case 0x87: // xchg r, a
 644   case REP4(0x38): // cmp...
 645   case 0x85: // test r, a
 646     debug_only(has_disp32 = true); // has both kinds of operands!
 647     break;
 648 
 649   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 650   case 0xC6: // movb a, #8
 651   case 0x80: // cmpb a, #8
 652   case 0x6B: // imul r, a, #8
 653     debug_only(has_disp32 = true); // has both kinds of operands!
 654     tail_size = 1; // the imm8
 655     break;
 656 
 657   case 0xC4: // VEX_3bytes
 658   case 0xC5: // VEX_2bytes
 659     assert((UseAVX > 0), "shouldn't have VEX prefix");
 660     assert(ip == inst+1, "no prefixes allowed");
 661     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 662     // but they have prefix 0x0F and processed when 0x0F processed above.
 663     //
 664     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 665     // instructions (these instructions are not supported in 64-bit mode).
 666     // To distinguish them bits [7:6] are set in the VEX second byte since
 667     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 668     // those VEX bits REX and vvvv bits are inverted.
 669     //
 670     // Fortunately C2 doesn't generate these instructions so we don't need
 671     // to check for them in product version.
 672 
 673     // Check second byte
 674     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 675 
 676     // First byte
 677     if ((0xFF & *inst) == VEX_3bytes) {
 678       ip++; // third byte
 679       is_64bit = ((VEX_W & *ip) == VEX_W);
 680     }
 681     ip++; // opcode
 682     // To find the end of instruction (which == end_pc_operand).
 683     switch (0xFF & *ip) {
 684     case 0x61: // pcmpestri r, r/a, #8
 685     case 0x70: // pshufd r, r/a, #8
 686     case 0x73: // psrldq r, #8
 687       tail_size = 1;  // the imm8
 688       break;
 689     default:
 690       break;
 691     }
 692     ip++; // skip opcode
 693     debug_only(has_disp32 = true); // has both kinds of operands!
 694     break;
 695 
 696   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 697   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 698   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 699   case 0xDD: // fld_d a; fst_d a; fstp_d a
 700   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 701   case 0xDF: // fild_d a; fistp_d a
 702   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 703   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 704   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 705     debug_only(has_disp32 = true);
 706     break;
 707 
 708   case 0xE8: // call rdisp32
 709   case 0xE9: // jmp  rdisp32
 710     if (which == end_pc_operand)  return ip + 4;
 711     assert(which == call32_operand, "call has no disp32 or imm");
 712     return ip;
 713 
 714   case 0xF0:                    // Lock
 715     assert(os::is_MP(), "only on MP");
 716     goto again_after_prefix;
 717 
 718   case 0xF3:                    // For SSE
 719   case 0xF2:                    // For SSE2
 720     switch (0xFF & *ip++) {
 721     case REX:
 722     case REX_B:
 723     case REX_X:
 724     case REX_XB:
 725     case REX_R:
 726     case REX_RB:
 727     case REX_RX:
 728     case REX_RXB:
 729     case REX_W:
 730     case REX_WB:
 731     case REX_WX:
 732     case REX_WXB:
 733     case REX_WR:
 734     case REX_WRB:
 735     case REX_WRX:
 736     case REX_WRXB:
 737       NOT_LP64(assert(false, "found 64bit prefix"));
 738       ip++;
 739     default:
 740       ip++;
 741     }
 742     debug_only(has_disp32 = true); // has both kinds of operands!
 743     break;
 744 
 745   default:
 746     ShouldNotReachHere();
 747 
 748 #undef REP8
 749 #undef REP16
 750   }
 751 
 752   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 753 #ifdef _LP64
 754   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 755 #else
 756   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 757   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 758 #endif // LP64
 759   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
 760 
 761   // parse the output of emit_operand
 762   int op2 = 0xFF & *ip++;
 763   int base = op2 & 0x07;
 764   int op3 = -1;
 765   const int b100 = 4;
 766   const int b101 = 5;
 767   if (base == b100 && (op2 >> 6) != 3) {
 768     op3 = 0xFF & *ip++;
 769     base = op3 & 0x07;   // refetch the base
 770   }
 771   // now ip points at the disp (if any)
 772 
 773   switch (op2 >> 6) {
 774   case 0:
 775     // [00 reg  100][ss index base]
 776     // [00 reg  100][00   100  esp]
 777     // [00 reg base]
 778     // [00 reg  100][ss index  101][disp32]
 779     // [00 reg  101]               [disp32]
 780 
 781     if (base == b101) {
 782       if (which == disp32_operand)
 783         return ip;              // caller wants the disp32
 784       ip += 4;                  // skip the disp32
 785     }
 786     break;
 787 
 788   case 1:
 789     // [01 reg  100][ss index base][disp8]
 790     // [01 reg  100][00   100  esp][disp8]
 791     // [01 reg base]               [disp8]
 792     ip += 1;                    // skip the disp8
 793     break;
 794 
 795   case 2:
 796     // [10 reg  100][ss index base][disp32]
 797     // [10 reg  100][00   100  esp][disp32]
 798     // [10 reg base]               [disp32]
 799     if (which == disp32_operand)
 800       return ip;                // caller wants the disp32
 801     ip += 4;                    // skip the disp32
 802     break;
 803 
 804   case 3:
 805     // [11 reg base]  (not a memory addressing mode)
 806     break;
 807   }
 808 
 809   if (which == end_pc_operand) {
 810     return ip + tail_size;
 811   }
 812 
 813 #ifdef _LP64
 814   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
 815 #else
 816   assert(which == imm_operand, "instruction has only an imm field");
 817 #endif // LP64
 818   return ip;
 819 }
 820 
 821 address Assembler::locate_next_instruction(address inst) {
 822   // Secretly share code with locate_operand:
 823   return locate_operand(inst, end_pc_operand);
 824 }
 825 
 826 
 827 #ifdef ASSERT
 828 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
 829   address inst = inst_mark();
 830   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
 831   address opnd;
 832 
 833   Relocation* r = rspec.reloc();
 834   if (r->type() == relocInfo::none) {
 835     return;
 836   } else if (r->is_call() || format == call32_operand) {
 837     // assert(format == imm32_operand, "cannot specify a nonzero format");
 838     opnd = locate_operand(inst, call32_operand);
 839   } else if (r->is_data()) {
 840     assert(format == imm_operand || format == disp32_operand
 841            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
 842     opnd = locate_operand(inst, (WhichOperand)format);
 843   } else {
 844     assert(format == imm_operand, "cannot specify a format");
 845     return;
 846   }
 847   assert(opnd == pc(), "must put operand where relocs can find it");
 848 }
 849 #endif // ASSERT
 850 
 851 void Assembler::emit_operand32(Register reg, Address adr) {
 852   assert(reg->encoding() < 8, "no extended registers");
 853   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 854   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 855                adr._rspec);
 856 }
 857 
 858 void Assembler::emit_operand(Register reg, Address adr,
 859                              int rip_relative_correction) {
 860   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 861                adr._rspec,
 862                rip_relative_correction);
 863 }
 864 
 865 void Assembler::emit_operand(XMMRegister reg, Address adr) {
 866   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
 867                adr._rspec);
 868 }
 869 
 870 // MMX operations
 871 void Assembler::emit_operand(MMXRegister reg, Address adr) {
 872   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 873   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 874 }
 875 
 876 // work around gcc (3.2.1-7a) bug
 877 void Assembler::emit_operand(Address adr, MMXRegister reg) {
 878   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
 879   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
 880 }
 881 
 882 
 883 void Assembler::emit_farith(int b1, int b2, int i) {
 884   assert(isByte(b1) && isByte(b2), "wrong opcode");
 885   assert(0 <= i &&  i < 8, "illegal stack offset");
 886   emit_int8(b1);
 887   emit_int8(b2 + i);
 888 }
 889 
 890 
 891 // Now the Assembler instructions (identical for 32/64 bits)
 892 
 893 void Assembler::adcl(Address dst, int32_t imm32) {
 894   InstructionMark im(this);
 895   prefix(dst);
 896   emit_arith_operand(0x81, rdx, dst, imm32);
 897 }
 898 
 899 void Assembler::adcl(Address dst, Register src) {
 900   InstructionMark im(this);
 901   prefix(dst, src);
 902   emit_int8(0x11);
 903   emit_operand(src, dst);
 904 }
 905 
 906 void Assembler::adcl(Register dst, int32_t imm32) {
 907   prefix(dst);
 908   emit_arith(0x81, 0xD0, dst, imm32);
 909 }
 910 
 911 void Assembler::adcl(Register dst, Address src) {
 912   InstructionMark im(this);
 913   prefix(src, dst);
 914   emit_int8(0x13);
 915   emit_operand(dst, src);
 916 }
 917 
 918 void Assembler::adcl(Register dst, Register src) {
 919   (void) prefix_and_encode(dst->encoding(), src->encoding());
 920   emit_arith(0x13, 0xC0, dst, src);
 921 }
 922 
 923 void Assembler::addl(Address dst, int32_t imm32) {
 924   InstructionMark im(this);
 925   prefix(dst);
 926   emit_arith_operand(0x81, rax, dst, imm32);
 927 }
 928 
 929 void Assembler::addl(Address dst, Register src) {
 930   InstructionMark im(this);
 931   prefix(dst, src);
 932   emit_int8(0x01);
 933   emit_operand(src, dst);
 934 }
 935 
 936 void Assembler::addl(Register dst, int32_t imm32) {
 937   prefix(dst);
 938   emit_arith(0x81, 0xC0, dst, imm32);
 939 }
 940 
 941 void Assembler::addl(Register dst, Address src) {
 942   InstructionMark im(this);
 943   prefix(src, dst);
 944   emit_int8(0x03);
 945   emit_operand(dst, src);
 946 }
 947 
 948 void Assembler::addl(Register dst, Register src) {
 949   (void) prefix_and_encode(dst->encoding(), src->encoding());
 950   emit_arith(0x03, 0xC0, dst, src);
 951 }
 952 
 953 void Assembler::addr_nop_4() {
 954   assert(UseAddressNop, "no CPU support");
 955   // 4 bytes: NOP DWORD PTR [EAX+0]
 956   emit_int8(0x0F);
 957   emit_int8(0x1F);
 958   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
 959   emit_int8(0);    // 8-bits offset (1 byte)
 960 }
 961 
 962 void Assembler::addr_nop_5() {
 963   assert(UseAddressNop, "no CPU support");
 964   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
 965   emit_int8(0x0F);
 966   emit_int8(0x1F);
 967   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
 968   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 969   emit_int8(0);    // 8-bits offset (1 byte)
 970 }
 971 
 972 void Assembler::addr_nop_7() {
 973   assert(UseAddressNop, "no CPU support");
 974   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
 975   emit_int8(0x0F);
 976   emit_int8(0x1F);
 977   emit_int8((unsigned char)0x80);
 978                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 979   emit_long(0);    // 32-bits offset (4 bytes)
 980 }
 981 
 982 void Assembler::addr_nop_8() {
 983   assert(UseAddressNop, "no CPU support");
 984   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 985   emit_int8(0x0F);
 986   emit_int8(0x1F);
 987   emit_int8((unsigned char)0x84);
 988                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 989   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 990   emit_long(0);    // 32-bits offset (4 bytes)
 991 }
 992 
 993 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
 994   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
 995   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 996 }
 997 
 998 void Assembler::addsd(XMMRegister dst, Address src) {
 999   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1000   emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
1001 }
1002 
1003 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1004   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1005   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1006 }
1007 
1008 void Assembler::addss(XMMRegister dst, Address src) {
1009   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1010   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
1011 }
1012 
1013 void Assembler::aesdec(XMMRegister dst, Address src) {
1014   assert(VM_Version::supports_aes(), "");
1015   InstructionMark im(this);
1016   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1017   emit_int8((unsigned char)0xDE);
1018   emit_operand(dst, src);
1019 }
1020 
1021 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1022   assert(VM_Version::supports_aes(), "");
1023   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1024   emit_int8((unsigned char)0xDE);
1025   emit_int8(0xC0 | encode);
1026 }
1027 
1028 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1029   assert(VM_Version::supports_aes(), "");
1030   InstructionMark im(this);
1031   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1032   emit_int8((unsigned char)0xDF);
1033   emit_operand(dst, src);
1034 }
1035 
1036 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1037   assert(VM_Version::supports_aes(), "");
1038   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1039   emit_int8((unsigned char)0xDF);
1040   emit_int8((unsigned char)(0xC0 | encode));
1041 }
1042 
1043 void Assembler::aesenc(XMMRegister dst, Address src) {
1044   assert(VM_Version::supports_aes(), "");
1045   InstructionMark im(this);
1046   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1047   emit_int8((unsigned char)0xDC);
1048   emit_operand(dst, src);
1049 }
1050 
1051 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1052   assert(VM_Version::supports_aes(), "");
1053   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1054   emit_int8((unsigned char)0xDC);
1055   emit_int8(0xC0 | encode);
1056 }
1057 
1058 void Assembler::aesenclast(XMMRegister dst, Address src) {
1059   assert(VM_Version::supports_aes(), "");
1060   InstructionMark im(this);
1061   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1062   emit_int8((unsigned char)0xDD);
1063   emit_operand(dst, src);
1064 }
1065 
1066 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1067   assert(VM_Version::supports_aes(), "");
1068   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
1069   emit_int8((unsigned char)0xDD);
1070   emit_int8((unsigned char)(0xC0 | encode));
1071 }
1072 
1073 
1074 void Assembler::andl(Address dst, int32_t imm32) {
1075   InstructionMark im(this);
1076   prefix(dst);
1077   emit_int8((unsigned char)0x81);
1078   emit_operand(rsp, dst, 4);
1079   emit_long(imm32);
1080 }
1081 
1082 void Assembler::andl(Register dst, int32_t imm32) {
1083   prefix(dst);
1084   emit_arith(0x81, 0xE0, dst, imm32);
1085 }
1086 
1087 void Assembler::andl(Register dst, Address src) {
1088   InstructionMark im(this);
1089   prefix(src, dst);
1090   emit_int8(0x23);
1091   emit_operand(dst, src);
1092 }
1093 
1094 void Assembler::andl(Register dst, Register src) {
1095   (void) prefix_and_encode(dst->encoding(), src->encoding());
1096   emit_arith(0x23, 0xC0, dst, src);
1097 }
1098 
1099 void Assembler::bsfl(Register dst, Register src) {
1100   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1101   emit_int8(0x0F);
1102   emit_int8((unsigned char)0xBC);
1103   emit_int8((unsigned char)(0xC0 | encode));
1104 }
1105 
1106 void Assembler::bsrl(Register dst, Register src) {
1107   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
1108   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1109   emit_int8(0x0F);
1110   emit_int8((unsigned char)0xBD);
1111   emit_int8((unsigned char)(0xC0 | encode));
1112 }
1113 
1114 void Assembler::bswapl(Register reg) { // bswap
1115   int encode = prefix_and_encode(reg->encoding());
1116   emit_int8(0x0F);
1117   emit_int8((unsigned char)(0xC8 | encode));
1118 }
1119 
1120 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1121   // suspect disp32 is always good
1122   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1123 
1124   if (L.is_bound()) {
1125     const int long_size = 5;
1126     int offs = (int)( target(L) - pc() );
1127     assert(offs <= 0, "assembler error");
1128     InstructionMark im(this);
1129     // 1110 1000 #32-bit disp
1130     emit_int8((unsigned char)0xE8);
1131     emit_data(offs - long_size, rtype, operand);
1132   } else {
1133     InstructionMark im(this);
1134     // 1110 1000 #32-bit disp
1135     L.add_patch_at(code(), locator());
1136 
1137     emit_int8((unsigned char)0xE8);
1138     emit_data(int(0), rtype, operand);
1139   }
1140 }
1141 
1142 void Assembler::call(Register dst) {
1143   int encode = prefix_and_encode(dst->encoding());
1144   emit_int8((unsigned char)0xFF);
1145   emit_int8((unsigned char)(0xD0 | encode));
1146 }
1147 
1148 
1149 void Assembler::call(Address adr) {
1150   InstructionMark im(this);
1151   prefix(adr);
1152   emit_int8((unsigned char)0xFF);
1153   emit_operand(rdx, adr);
1154 }
1155 
1156 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1157   assert(entry != NULL, "call most probably wrong");
1158   InstructionMark im(this);
1159   emit_int8((unsigned char)0xE8);
1160   intptr_t disp = entry - (pc() + sizeof(int32_t));
1161   assert(is_simm32(disp), "must be 32bit offset (call2)");
1162   // Technically, should use call32_operand, but this format is
1163   // implied by the fact that we're emitting a call instruction.
1164 
1165   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1166   emit_data((int) disp, rspec, operand);
1167 }
1168 
1169 void Assembler::cdql() {
1170   emit_int8((unsigned char)0x99);
1171 }
1172 
1173 void Assembler::cld() {
1174   emit_int8((unsigned char)0xFC);
1175 }
1176 
1177 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1178   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1179   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1180   emit_int8(0x0F);
1181   emit_int8(0x40 | cc);
1182   emit_int8((unsigned char)(0xC0 | encode));
1183 }
1184 
1185 
1186 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1187   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1188   prefix(src, dst);
1189   emit_int8(0x0F);
1190   emit_int8(0x40 | cc);
1191   emit_operand(dst, src);
1192 }
1193 
1194 void Assembler::cmpb(Address dst, int imm8) {
1195   InstructionMark im(this);
1196   prefix(dst);
1197   emit_int8((unsigned char)0x80);
1198   emit_operand(rdi, dst, 1);
1199   emit_int8(imm8);
1200 }
1201 
1202 void Assembler::cmpl(Address dst, int32_t imm32) {
1203   InstructionMark im(this);
1204   prefix(dst);
1205   emit_int8((unsigned char)0x81);
1206   emit_operand(rdi, dst, 4);
1207   emit_long(imm32);
1208 }
1209 
1210 void Assembler::cmpl(Register dst, int32_t imm32) {
1211   prefix(dst);
1212   emit_arith(0x81, 0xF8, dst, imm32);
1213 }
1214 
1215 void Assembler::cmpl(Register dst, Register src) {
1216   (void) prefix_and_encode(dst->encoding(), src->encoding());
1217   emit_arith(0x3B, 0xC0, dst, src);
1218 }
1219 
1220 
1221 void Assembler::cmpl(Register dst, Address  src) {
1222   InstructionMark im(this);
1223   prefix(src, dst);
1224   emit_int8((unsigned char)0x3B);
1225   emit_operand(dst, src);
1226 }
1227 
1228 void Assembler::cmpw(Address dst, int imm16) {
1229   InstructionMark im(this);
1230   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1231   emit_int8(0x66);
1232   emit_int8((unsigned char)0x81);
1233   emit_operand(rdi, dst, 2);
1234   emit_int16(imm16);
1235 }
1236 
1237 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1238 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1239 // The ZF is set if the compared values were equal, and cleared otherwise.
1240 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1241   InstructionMark im(this);
1242   prefix(adr, reg);
1243   emit_int8(0x0F);
1244   emit_int8((unsigned char)0xB1);
1245   emit_operand(reg, adr);
1246 }
1247 
1248 void Assembler::comisd(XMMRegister dst, Address src) {
1249   // NOTE: dbx seems to decode this as comiss even though the
1250   // 0x66 is there. Strangly ucomisd comes out correct
1251   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1252   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1253 }
1254 
1255 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1257   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
1258 }
1259 
1260 void Assembler::comiss(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1262   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1263 }
1264 
1265 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1266   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1267   emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
1268 }
1269 
1270 void Assembler::cpuid() {
1271   emit_int8(0x0F);
1272   emit_int8((unsigned char)0xA2);
1273 }
1274 
1275 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1276   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1277   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
1278 }
1279 
1280 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1281   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1282   emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
1283 }
1284 
1285 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1286   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1287   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1288 }
1289 
1290 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1291   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1292   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
1293 }
1294 
1295 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1296   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1297   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
1298   emit_int8(0x2A);
1299   emit_int8((unsigned char)(0xC0 | encode));
1300 }
1301 
1302 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1303   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1304   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
1305 }
1306 
1307 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1308   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1309   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
1310   emit_int8(0x2A);
1311   emit_int8((unsigned char)(0xC0 | encode));
1312 }
1313 
1314 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1315   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1316   emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
1317 }
1318 
1319 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1320   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1321   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1322 }
1323 
1324 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1325   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1326   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
1327 }
1328 
1329 
1330 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1331   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1332   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
1333   emit_int8(0x2C);
1334   emit_int8((unsigned char)(0xC0 | encode));
1335 }
1336 
1337 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1338   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1339   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
1340   emit_int8(0x2C);
1341   emit_int8((unsigned char)(0xC0 | encode));
1342 }
1343 
1344 void Assembler::decl(Address dst) {
1345   // Don't use it directly. Use MacroAssembler::decrement() instead.
1346   InstructionMark im(this);
1347   prefix(dst);
1348   emit_int8((unsigned char)0xFF);
1349   emit_operand(rcx, dst);
1350 }
1351 
1352 void Assembler::divsd(XMMRegister dst, Address src) {
1353   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1354   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1355 }
1356 
1357 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1358   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1359   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
1360 }
1361 
1362 void Assembler::divss(XMMRegister dst, Address src) {
1363   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1364   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1365 }
1366 
1367 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1368   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1369   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
1370 }
1371 
1372 void Assembler::emms() {
1373   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1374   emit_int8(0x0F);
1375   emit_int8(0x77);
1376 }
1377 
1378 void Assembler::hlt() {
1379   emit_int8((unsigned char)0xF4);
1380 }
1381 
1382 void Assembler::idivl(Register src) {
1383   int encode = prefix_and_encode(src->encoding());
1384   emit_int8((unsigned char)0xF7);
1385   emit_int8((unsigned char)(0xF8 | encode));
1386 }
1387 
1388 void Assembler::divl(Register src) { // Unsigned
1389   int encode = prefix_and_encode(src->encoding());
1390   emit_int8((unsigned char)0xF7);
1391   emit_int8((unsigned char)(0xF0 | encode));
1392 }
1393 
1394 void Assembler::imull(Register dst, Register src) {
1395   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1396   emit_int8(0x0F);
1397   emit_int8((unsigned char)0xAF);
1398   emit_int8((unsigned char)(0xC0 | encode));
1399 }
1400 
1401 
1402 void Assembler::imull(Register dst, Register src, int value) {
1403   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1404   if (is8bit(value)) {
1405     emit_int8(0x6B);
1406     emit_int8((unsigned char)(0xC0 | encode));
1407     emit_int8(value & 0xFF);
1408   } else {
1409     emit_int8(0x69);
1410     emit_int8((unsigned char)(0xC0 | encode));
1411     emit_long(value);
1412   }
1413 }
1414 
1415 void Assembler::incl(Address dst) {
1416   // Don't use it directly. Use MacroAssembler::increment() instead.
1417   InstructionMark im(this);
1418   prefix(dst);
1419   emit_int8((unsigned char)0xFF);
1420   emit_operand(rax, dst);
1421 }
1422 
1423 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1424   InstructionMark im(this);
1425   assert((0 <= cc) && (cc < 16), "illegal cc");
1426   if (L.is_bound()) {
1427     address dst = target(L);
1428     assert(dst != NULL, "jcc most probably wrong");
1429 
1430     const int short_size = 2;
1431     const int long_size = 6;
1432     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1433     if (maybe_short && is8bit(offs - short_size)) {
1434       // 0111 tttn #8-bit disp
1435       emit_int8(0x70 | cc);
1436       emit_int8((offs - short_size) & 0xFF);
1437     } else {
1438       // 0000 1111 1000 tttn #32-bit disp
1439       assert(is_simm32(offs - long_size),
1440              "must be 32bit offset (call4)");
1441       emit_int8(0x0F);
1442       emit_int8((unsigned char)(0x80 | cc));
1443       emit_long(offs - long_size);
1444     }
1445   } else {
1446     // Note: could eliminate cond. jumps to this jump if condition
1447     //       is the same however, seems to be rather unlikely case.
1448     // Note: use jccb() if label to be bound is very close to get
1449     //       an 8-bit displacement
1450     L.add_patch_at(code(), locator());
1451     emit_int8(0x0F);
1452     emit_int8((unsigned char)(0x80 | cc));
1453     emit_long(0);
1454   }
1455 }
1456 
1457 void Assembler::jccb(Condition cc, Label& L) {
1458   if (L.is_bound()) {
1459     const int short_size = 2;
1460     address entry = target(L);
1461 #ifdef ASSERT
1462     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1463     intptr_t delta = short_branch_delta();
1464     if (delta != 0) {
1465       dist += (dist < 0 ? (-delta) :delta);
1466     }
1467     assert(is8bit(dist), "Dispacement too large for a short jmp");
1468 #endif
1469     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
1470     // 0111 tttn #8-bit disp
1471     emit_int8(0x70 | cc);
1472     emit_int8((offs - short_size) & 0xFF);
1473   } else {
1474     InstructionMark im(this);
1475     L.add_patch_at(code(), locator());
1476     emit_int8(0x70 | cc);
1477     emit_int8(0);
1478   }
1479 }
1480 
1481 void Assembler::jmp(Address adr) {
1482   InstructionMark im(this);
1483   prefix(adr);
1484   emit_int8((unsigned char)0xFF);
1485   emit_operand(rsp, adr);
1486 }
1487 
1488 void Assembler::jmp(Label& L, bool maybe_short) {
1489   if (L.is_bound()) {
1490     address entry = target(L);
1491     assert(entry != NULL, "jmp most probably wrong");
1492     InstructionMark im(this);
1493     const int short_size = 2;
1494     const int long_size = 5;
1495     intptr_t offs = entry - pc();
1496     if (maybe_short && is8bit(offs - short_size)) {
1497       emit_int8((unsigned char)0xEB);
1498       emit_int8((offs - short_size) & 0xFF);
1499     } else {
1500       emit_int8((unsigned char)0xE9);
1501       emit_long(offs - long_size);
1502     }
1503   } else {
1504     // By default, forward jumps are always 32-bit displacements, since
1505     // we can't yet know where the label will be bound.  If you're sure that
1506     // the forward jump will not run beyond 256 bytes, use jmpb to
1507     // force an 8-bit displacement.
1508     InstructionMark im(this);
1509     L.add_patch_at(code(), locator());
1510     emit_int8((unsigned char)0xE9);
1511     emit_long(0);
1512   }
1513 }
1514 
1515 void Assembler::jmp(Register entry) {
1516   int encode = prefix_and_encode(entry->encoding());
1517   emit_int8((unsigned char)0xFF);
1518   emit_int8((unsigned char)(0xE0 | encode));
1519 }
1520 
1521 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
1522   InstructionMark im(this);
1523   emit_int8((unsigned char)0xE9);
1524   assert(dest != NULL, "must have a target");
1525   intptr_t disp = dest - (pc() + sizeof(int32_t));
1526   assert(is_simm32(disp), "must be 32bit offset (jmp)");
1527   emit_data(disp, rspec.reloc(), call32_operand);
1528 }
1529 
1530 void Assembler::jmpb(Label& L) {
1531   if (L.is_bound()) {
1532     const int short_size = 2;
1533     address entry = target(L);
1534     assert(entry != NULL, "jmp most probably wrong");
1535 #ifdef ASSERT
1536     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
1537     intptr_t delta = short_branch_delta();
1538     if (delta != 0) {
1539       dist += (dist < 0 ? (-delta) :delta);
1540     }
1541     assert(is8bit(dist), "Dispacement too large for a short jmp");
1542 #endif
1543     intptr_t offs = entry - pc();
1544     emit_int8((unsigned char)0xEB);
1545     emit_int8((offs - short_size) & 0xFF);
1546   } else {
1547     InstructionMark im(this);
1548     L.add_patch_at(code(), locator());
1549     emit_int8((unsigned char)0xEB);
1550     emit_int8(0);
1551   }
1552 }
1553 
1554 void Assembler::ldmxcsr( Address src) {
1555   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1556   InstructionMark im(this);
1557   prefix(src);
1558   emit_int8(0x0F);
1559   emit_int8((unsigned char)0xAE);
1560   emit_operand(as_Register(2), src);
1561 }
1562 
1563 void Assembler::leal(Register dst, Address src) {
1564   InstructionMark im(this);
1565 #ifdef _LP64
1566   emit_int8(0x67); // addr32
1567   prefix(src, dst);
1568 #endif // LP64
1569   emit_int8((unsigned char)0x8D);
1570   emit_operand(dst, src);
1571 }
1572 
1573 void Assembler::lfence() {
1574   emit_int8(0x0F);
1575   emit_int8((unsigned char)0xAE);
1576   emit_int8((unsigned char)0xE8);
1577 }
1578 
1579 void Assembler::lock() {
1580   emit_int8((unsigned char)0xF0);
1581 }
1582 
1583 void Assembler::lzcntl(Register dst, Register src) {
1584   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
1585   emit_int8((unsigned char)0xF3);
1586   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1587   emit_int8(0x0F);
1588   emit_int8((unsigned char)0xBD);
1589   emit_int8((unsigned char)(0xC0 | encode));
1590 }
1591 
1592 // Emit mfence instruction
1593 void Assembler::mfence() {
1594   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
1595   emit_int8(0x0F);
1596   emit_int8((unsigned char)0xAE);
1597   emit_int8((unsigned char)0xF0);
1598 }
1599 
1600 void Assembler::mov(Register dst, Register src) {
1601   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
1602 }
1603 
1604 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
1605   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1606   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
1607 }
1608 
1609 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
1610   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1611   emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
1612 }
1613 
1614 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
1615   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1616   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
1617   emit_int8(0x16);
1618   emit_int8((unsigned char)(0xC0 | encode));
1619 }
1620 
1621 void Assembler::movb(Register dst, Address src) {
1622   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
1623   InstructionMark im(this);
1624   prefix(src, dst, true);
1625   emit_int8((unsigned char)0x8A);
1626   emit_operand(dst, src);
1627 }
1628 
1629 
1630 void Assembler::movb(Address dst, int imm8) {
1631   InstructionMark im(this);
1632    prefix(dst);
1633   emit_int8((unsigned char)0xC6);
1634   emit_operand(rax, dst, 1);
1635   emit_int8(imm8);
1636 }
1637 
1638 
1639 void Assembler::movb(Address dst, Register src) {
1640   assert(src->has_byte_register(), "must have byte register");
1641   InstructionMark im(this);
1642   prefix(dst, src, true);
1643   emit_int8((unsigned char)0x88);
1644   emit_operand(src, dst);
1645 }
1646 
1647 void Assembler::movdl(XMMRegister dst, Register src) {
1648   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1649   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
1650   emit_int8(0x6E);
1651   emit_int8((unsigned char)(0xC0 | encode));
1652 }
1653 
1654 void Assembler::movdl(Register dst, XMMRegister src) {
1655   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1656   // swap src/dst to get correct prefix
1657   int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
1658   emit_int8(0x7E);
1659   emit_int8((unsigned char)(0xC0 | encode));
1660 }
1661 
1662 void Assembler::movdl(XMMRegister dst, Address src) {
1663   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1664   InstructionMark im(this);
1665   simd_prefix(dst, src, VEX_SIMD_66);
1666   emit_int8(0x6E);
1667   emit_operand(dst, src);
1668 }
1669 
1670 void Assembler::movdl(Address dst, XMMRegister src) {
1671   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1672   InstructionMark im(this);
1673   simd_prefix(dst, src, VEX_SIMD_66);
1674   emit_int8(0x7E);
1675   emit_operand(src, dst);
1676 }
1677 
1678 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
1679   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1680   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
1681 }
1682 
1683 void Assembler::movdqu(XMMRegister dst, Address src) {
1684   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1685   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1686 }
1687 
1688 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
1689   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1690   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
1691 }
1692 
1693 void Assembler::movdqu(Address dst, XMMRegister src) {
1694   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1695   InstructionMark im(this);
1696   simd_prefix(dst, src, VEX_SIMD_F3);
1697   emit_int8(0x7F);
1698   emit_operand(src, dst);
1699 }
1700 
1701 // Move Unaligned 256bit Vector
1702 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
1703   assert(UseAVX, "");
1704   bool vector256 = true;
1705   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1706   emit_int8(0x6F);
1707   emit_int8((unsigned char)(0xC0 | encode));
1708 }
1709 
1710 void Assembler::vmovdqu(XMMRegister dst, Address src) {
1711   assert(UseAVX, "");
1712   InstructionMark im(this);
1713   bool vector256 = true;
1714   vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
1715   emit_int8(0x6F);
1716   emit_operand(dst, src);
1717 }
1718 
1719 void Assembler::vmovdqu(Address dst, XMMRegister src) {
1720   assert(UseAVX, "");
1721   InstructionMark im(this);
1722   bool vector256 = true;
1723   // swap src<->dst for encoding
1724   assert(src != xnoreg, "sanity");
1725   vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
1726   emit_int8(0x7F);
1727   emit_operand(src, dst);
1728 }
1729 
1730 // Uses zero extension on 64bit
1731 
1732 void Assembler::movl(Register dst, int32_t imm32) {
1733   int encode = prefix_and_encode(dst->encoding());
1734   emit_int8((unsigned char)(0xB8 | encode));
1735   emit_long(imm32);
1736 }
1737 
1738 void Assembler::movl(Register dst, Register src) {
1739   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1740   emit_int8((unsigned char)0x8B);
1741   emit_int8((unsigned char)(0xC0 | encode));
1742 }
1743 
1744 void Assembler::movl(Register dst, Address src) {
1745   InstructionMark im(this);
1746   prefix(src, dst);
1747   emit_int8((unsigned char)0x8B);
1748   emit_operand(dst, src);
1749 }
1750 
1751 void Assembler::movl(Address dst, int32_t imm32) {
1752   InstructionMark im(this);
1753   prefix(dst);
1754   emit_int8((unsigned char)0xC7);
1755   emit_operand(rax, dst, 4);
1756   emit_long(imm32);
1757 }
1758 
1759 void Assembler::movl(Address dst, Register src) {
1760   InstructionMark im(this);
1761   prefix(dst, src);
1762   emit_int8((unsigned char)0x89);
1763   emit_operand(src, dst);
1764 }
1765 
1766 // New cpus require to use movsd and movss to avoid partial register stall
1767 // when loading from memory. But for old Opteron use movlpd instead of movsd.
1768 // The selection is done in MacroAssembler::movdbl() and movflt().
1769 void Assembler::movlpd(XMMRegister dst, Address src) {
1770   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1771   emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
1772 }
1773 
1774 void Assembler::movq( MMXRegister dst, Address src ) {
1775   assert( VM_Version::supports_mmx(), "" );
1776   emit_int8(0x0F);
1777   emit_int8(0x6F);
1778   emit_operand(dst, src);
1779 }
1780 
1781 void Assembler::movq( Address dst, MMXRegister src ) {
1782   assert( VM_Version::supports_mmx(), "" );
1783   emit_int8(0x0F);
1784   emit_int8(0x7F);
1785   // workaround gcc (3.2.1-7a) bug
1786   // In that version of gcc with only an emit_operand(MMX, Address)
1787   // gcc will tail jump and try and reverse the parameters completely
1788   // obliterating dst in the process. By having a version available
1789   // that doesn't need to swap the args at the tail jump the bug is
1790   // avoided.
1791   emit_operand(dst, src);
1792 }
1793 
1794 void Assembler::movq(XMMRegister dst, Address src) {
1795   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1796   InstructionMark im(this);
1797   simd_prefix(dst, src, VEX_SIMD_F3);
1798   emit_int8(0x7E);
1799   emit_operand(dst, src);
1800 }
1801 
1802 void Assembler::movq(Address dst, XMMRegister src) {
1803   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1804   InstructionMark im(this);
1805   simd_prefix(dst, src, VEX_SIMD_66);
1806   emit_int8((unsigned char)0xD6);
1807   emit_operand(src, dst);
1808 }
1809 
1810 void Assembler::movsbl(Register dst, Address src) { // movsxb
1811   InstructionMark im(this);
1812   prefix(src, dst);
1813   emit_int8(0x0F);
1814   emit_int8((unsigned char)0xBE);
1815   emit_operand(dst, src);
1816 }
1817 
1818 void Assembler::movsbl(Register dst, Register src) { // movsxb
1819   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1820   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1821   emit_int8(0x0F);
1822   emit_int8((unsigned char)0xBE);
1823   emit_int8((unsigned char)(0xC0 | encode));
1824 }
1825 
1826 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
1827   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1828   emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
1829 }
1830 
1831 void Assembler::movsd(XMMRegister dst, Address src) {
1832   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1833   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
1834 }
1835 
1836 void Assembler::movsd(Address dst, XMMRegister src) {
1837   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1838   InstructionMark im(this);
1839   simd_prefix(dst, src, VEX_SIMD_F2);
1840   emit_int8(0x11);
1841   emit_operand(src, dst);
1842 }
1843 
1844 void Assembler::movss(XMMRegister dst, XMMRegister src) {
1845   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1846   emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
1847 }
1848 
1849 void Assembler::movss(XMMRegister dst, Address src) {
1850   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1851   emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
1852 }
1853 
1854 void Assembler::movss(Address dst, XMMRegister src) {
1855   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1856   InstructionMark im(this);
1857   simd_prefix(dst, src, VEX_SIMD_F3);
1858   emit_int8(0x11);
1859   emit_operand(src, dst);
1860 }
1861 
1862 void Assembler::movswl(Register dst, Address src) { // movsxw
1863   InstructionMark im(this);
1864   prefix(src, dst);
1865   emit_int8(0x0F);
1866   emit_int8((unsigned char)0xBF);
1867   emit_operand(dst, src);
1868 }
1869 
1870 void Assembler::movswl(Register dst, Register src) { // movsxw
1871   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1872   emit_int8(0x0F);
1873   emit_int8((unsigned char)0xBF);
1874   emit_int8((unsigned char)(0xC0 | encode));
1875 }
1876 
1877 void Assembler::movw(Address dst, int imm16) {
1878   InstructionMark im(this);
1879 
1880   emit_int8(0x66); // switch to 16-bit mode
1881   prefix(dst);
1882   emit_int8((unsigned char)0xC7);
1883   emit_operand(rax, dst, 2);
1884   emit_int16(imm16);
1885 }
1886 
1887 void Assembler::movw(Register dst, Address src) {
1888   InstructionMark im(this);
1889   emit_int8(0x66);
1890   prefix(src, dst);
1891   emit_int8((unsigned char)0x8B);
1892   emit_operand(dst, src);
1893 }
1894 
1895 void Assembler::movw(Address dst, Register src) {
1896   InstructionMark im(this);
1897   emit_int8(0x66);
1898   prefix(dst, src);
1899   emit_int8((unsigned char)0x89);
1900   emit_operand(src, dst);
1901 }
1902 
1903 void Assembler::movzbl(Register dst, Address src) { // movzxb
1904   InstructionMark im(this);
1905   prefix(src, dst);
1906   emit_int8(0x0F);
1907   emit_int8((unsigned char)0xB6);
1908   emit_operand(dst, src);
1909 }
1910 
1911 void Assembler::movzbl(Register dst, Register src) { // movzxb
1912   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
1913   int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
1914   emit_int8(0x0F);
1915   emit_int8((unsigned char)0xB6);
1916   emit_int8(0xC0 | encode);
1917 }
1918 
1919 void Assembler::movzwl(Register dst, Address src) { // movzxw
1920   InstructionMark im(this);
1921   prefix(src, dst);
1922   emit_int8(0x0F);
1923   emit_int8((unsigned char)0xB7);
1924   emit_operand(dst, src);
1925 }
1926 
1927 void Assembler::movzwl(Register dst, Register src) { // movzxw
1928   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1929   emit_int8(0x0F);
1930   emit_int8((unsigned char)0xB7);
1931   emit_int8(0xC0 | encode);
1932 }
1933 
1934 void Assembler::mull(Address src) {
1935   InstructionMark im(this);
1936   prefix(src);
1937   emit_int8((unsigned char)0xF7);
1938   emit_operand(rsp, src);
1939 }
1940 
1941 void Assembler::mull(Register src) {
1942   int encode = prefix_and_encode(src->encoding());
1943   emit_int8((unsigned char)0xF7);
1944   emit_int8((unsigned char)(0xE0 | encode));
1945 }
1946 
1947 void Assembler::mulsd(XMMRegister dst, Address src) {
1948   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1949   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1950 }
1951 
1952 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
1953   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1954   emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
1955 }
1956 
1957 void Assembler::mulss(XMMRegister dst, Address src) {
1958   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1959   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1960 }
1961 
1962 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
1963   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1964   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
1965 }
1966 
1967 void Assembler::negl(Register dst) {
1968   int encode = prefix_and_encode(dst->encoding());
1969   emit_int8((unsigned char)0xF7);
1970   emit_int8((unsigned char)(0xD8 | encode));
1971 }
1972 
1973 void Assembler::nop(int i) {
1974 #ifdef ASSERT
1975   assert(i > 0, " ");
1976   // The fancy nops aren't currently recognized by debuggers making it a
1977   // pain to disassemble code while debugging. If asserts are on clearly
1978   // speed is not an issue so simply use the single byte traditional nop
1979   // to do alignment.
1980 
1981   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
1982   return;
1983 
1984 #endif // ASSERT
1985 
1986   if (UseAddressNop && VM_Version::is_intel()) {
1987     //
1988     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
1989     //  1: 0x90
1990     //  2: 0x66 0x90
1991     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1992     //  4: 0x0F 0x1F 0x40 0x00
1993     //  5: 0x0F 0x1F 0x44 0x00 0x00
1994     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1995     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1996     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1997     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1998     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1999     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2000 
2001     // The rest coding is Intel specific - don't use consecutive address nops
2002 
2003     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2004     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2005     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2006     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
2007 
2008     while(i >= 15) {
2009       // For Intel don't generate consecutive addess nops (mix with regular nops)
2010       i -= 15;
2011       emit_int8(0x66);   // size prefix
2012       emit_int8(0x66);   // size prefix
2013       emit_int8(0x66);   // size prefix
2014       addr_nop_8();
2015       emit_int8(0x66);   // size prefix
2016       emit_int8(0x66);   // size prefix
2017       emit_int8(0x66);   // size prefix
2018       emit_int8((unsigned char)0x90);
2019                          // nop
2020     }
2021     switch (i) {
2022       case 14:
2023         emit_int8(0x66); // size prefix
2024       case 13:
2025         emit_int8(0x66); // size prefix
2026       case 12:
2027         addr_nop_8();
2028         emit_int8(0x66); // size prefix
2029         emit_int8(0x66); // size prefix
2030         emit_int8(0x66); // size prefix
2031         emit_int8((unsigned char)0x90);
2032                          // nop
2033         break;
2034       case 11:
2035         emit_int8(0x66); // size prefix
2036       case 10:
2037         emit_int8(0x66); // size prefix
2038       case 9:
2039         emit_int8(0x66); // size prefix
2040       case 8:
2041         addr_nop_8();
2042         break;
2043       case 7:
2044         addr_nop_7();
2045         break;
2046       case 6:
2047         emit_int8(0x66); // size prefix
2048       case 5:
2049         addr_nop_5();
2050         break;
2051       case 4:
2052         addr_nop_4();
2053         break;
2054       case 3:
2055         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2056         emit_int8(0x66); // size prefix
2057       case 2:
2058         emit_int8(0x66); // size prefix
2059       case 1:
2060         emit_int8((unsigned char)0x90);
2061                          // nop
2062         break;
2063       default:
2064         assert(i == 0, " ");
2065     }
2066     return;
2067   }
2068   if (UseAddressNop && VM_Version::is_amd()) {
2069     //
2070     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
2071     //  1: 0x90
2072     //  2: 0x66 0x90
2073     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2074     //  4: 0x0F 0x1F 0x40 0x00
2075     //  5: 0x0F 0x1F 0x44 0x00 0x00
2076     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2077     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2078     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2079     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2080     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2081     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2082 
2083     // The rest coding is AMD specific - use consecutive address nops
2084 
2085     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2086     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
2087     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2088     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2089     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2090     //     Size prefixes (0x66) are added for larger sizes
2091 
2092     while(i >= 22) {
2093       i -= 11;
2094       emit_int8(0x66); // size prefix
2095       emit_int8(0x66); // size prefix
2096       emit_int8(0x66); // size prefix
2097       addr_nop_8();
2098     }
2099     // Generate first nop for size between 21-12
2100     switch (i) {
2101       case 21:
2102         i -= 1;
2103         emit_int8(0x66); // size prefix
2104       case 20:
2105       case 19:
2106         i -= 1;
2107         emit_int8(0x66); // size prefix
2108       case 18:
2109       case 17:
2110         i -= 1;
2111         emit_int8(0x66); // size prefix
2112       case 16:
2113       case 15:
2114         i -= 8;
2115         addr_nop_8();
2116         break;
2117       case 14:
2118       case 13:
2119         i -= 7;
2120         addr_nop_7();
2121         break;
2122       case 12:
2123         i -= 6;
2124         emit_int8(0x66); // size prefix
2125         addr_nop_5();
2126         break;
2127       default:
2128         assert(i < 12, " ");
2129     }
2130 
2131     // Generate second nop for size between 11-1
2132     switch (i) {
2133       case 11:
2134         emit_int8(0x66); // size prefix
2135       case 10:
2136         emit_int8(0x66); // size prefix
2137       case 9:
2138         emit_int8(0x66); // size prefix
2139       case 8:
2140         addr_nop_8();
2141         break;
2142       case 7:
2143         addr_nop_7();
2144         break;
2145       case 6:
2146         emit_int8(0x66); // size prefix
2147       case 5:
2148         addr_nop_5();
2149         break;
2150       case 4:
2151         addr_nop_4();
2152         break;
2153       case 3:
2154         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
2155         emit_int8(0x66); // size prefix
2156       case 2:
2157         emit_int8(0x66); // size prefix
2158       case 1:
2159         emit_int8((unsigned char)0x90);
2160                          // nop
2161         break;
2162       default:
2163         assert(i == 0, " ");
2164     }
2165     return;
2166   }
2167 
2168   // Using nops with size prefixes "0x66 0x90".
2169   // From AMD Optimization Guide:
2170   //  1: 0x90
2171   //  2: 0x66 0x90
2172   //  3: 0x66 0x66 0x90
2173   //  4: 0x66 0x66 0x66 0x90
2174   //  5: 0x66 0x66 0x90 0x66 0x90
2175   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
2176   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
2177   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
2178   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2179   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
2180   //
2181   while(i > 12) {
2182     i -= 4;
2183     emit_int8(0x66); // size prefix
2184     emit_int8(0x66);
2185     emit_int8(0x66);
2186     emit_int8((unsigned char)0x90);
2187                      // nop
2188   }
2189   // 1 - 12 nops
2190   if(i > 8) {
2191     if(i > 9) {
2192       i -= 1;
2193       emit_int8(0x66);
2194     }
2195     i -= 3;
2196     emit_int8(0x66);
2197     emit_int8(0x66);
2198     emit_int8((unsigned char)0x90);
2199   }
2200   // 1 - 8 nops
2201   if(i > 4) {
2202     if(i > 6) {
2203       i -= 1;
2204       emit_int8(0x66);
2205     }
2206     i -= 3;
2207     emit_int8(0x66);
2208     emit_int8(0x66);
2209     emit_int8((unsigned char)0x90);
2210   }
2211   switch (i) {
2212     case 4:
2213       emit_int8(0x66);
2214     case 3:
2215       emit_int8(0x66);
2216     case 2:
2217       emit_int8(0x66);
2218     case 1:
2219       emit_int8((unsigned char)0x90);
2220       break;
2221     default:
2222       assert(i == 0, " ");
2223   }
2224 }
2225 
2226 void Assembler::notl(Register dst) {
2227   int encode = prefix_and_encode(dst->encoding());
2228   emit_int8((unsigned char)0xF7);
2229   emit_int8((unsigned char)(0xD0 | encode));
2230 }
2231 
2232 void Assembler::orl(Address dst, int32_t imm32) {
2233   InstructionMark im(this);
2234   prefix(dst);
2235   emit_arith_operand(0x81, rcx, dst, imm32);
2236 }
2237 
2238 void Assembler::orl(Register dst, int32_t imm32) {
2239   prefix(dst);
2240   emit_arith(0x81, 0xC8, dst, imm32);
2241 }
2242 
2243 void Assembler::orl(Register dst, Address src) {
2244   InstructionMark im(this);
2245   prefix(src, dst);
2246   emit_int8(0x0B);
2247   emit_operand(dst, src);
2248 }
2249 
2250 void Assembler::orl(Register dst, Register src) {
2251   (void) prefix_and_encode(dst->encoding(), src->encoding());
2252   emit_arith(0x0B, 0xC0, dst, src);
2253 }
2254 
2255 void Assembler::packuswb(XMMRegister dst, Address src) {
2256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2257   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2258   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2259 }
2260 
2261 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
2262   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2263   emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
2264 }
2265 
2266 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2267   assert(VM_Version::supports_sse4_2(), "");
2268   InstructionMark im(this);
2269   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2270   emit_int8(0x61);
2271   emit_operand(dst, src);
2272   emit_int8(imm8);
2273 }
2274 
2275 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2276   assert(VM_Version::supports_sse4_2(), "");
2277   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
2278   emit_int8(0x61);
2279   emit_int8((unsigned char)(0xC0 | encode));
2280   emit_int8(imm8);
2281 }
2282 
2283 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
2284   assert(VM_Version::supports_sse4_1(), "");
2285   InstructionMark im(this);
2286   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2287   emit_int8(0x30);
2288   emit_operand(dst, src);
2289 }
2290 
2291 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2292   assert(VM_Version::supports_sse4_1(), "");
2293   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2294   emit_int8(0x30);
2295   emit_int8((unsigned char)(0xC0 | encode));
2296 }
2297 
2298 // generic
2299 void Assembler::pop(Register dst) {
2300   int encode = prefix_and_encode(dst->encoding());
2301   emit_int8(0x58 | encode);
2302 }
2303 
2304 void Assembler::popcntl(Register dst, Address src) {
2305   assert(VM_Version::supports_popcnt(), "must support");
2306   InstructionMark im(this);
2307   emit_int8((unsigned char)0xF3);
2308   prefix(src, dst);
2309   emit_int8(0x0F);
2310   emit_int8((unsigned char)0xB8);
2311   emit_operand(dst, src);
2312 }
2313 
2314 void Assembler::popcntl(Register dst, Register src) {
2315   assert(VM_Version::supports_popcnt(), "must support");
2316   emit_int8((unsigned char)0xF3);
2317   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2318   emit_int8(0x0F);
2319   emit_int8((unsigned char)0xB8);
2320   emit_int8((unsigned char)(0xC0 | encode));
2321 }
2322 
2323 void Assembler::popf() {
2324   emit_int8((unsigned char)0x9D);
2325 }
2326 
2327 #ifndef _LP64 // no 32bit push/pop on amd64
2328 void Assembler::popl(Address dst) {
2329   // NOTE: this will adjust stack by 8byte on 64bits
2330   InstructionMark im(this);
2331   prefix(dst);
2332   emit_int8((unsigned char)0x8F);
2333   emit_operand(rax, dst);
2334 }
2335 #endif
2336 
2337 void Assembler::prefetch_prefix(Address src) {
2338   prefix(src);
2339   emit_int8(0x0F);
2340 }
2341 
2342 void Assembler::prefetchnta(Address src) {
2343   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2344   InstructionMark im(this);
2345   prefetch_prefix(src);
2346   emit_int8(0x18);
2347   emit_operand(rax, src); // 0, src
2348 }
2349 
2350 void Assembler::prefetchr(Address src) {
2351   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2352   InstructionMark im(this);
2353   prefetch_prefix(src);
2354   emit_int8(0x0D);
2355   emit_operand(rax, src); // 0, src
2356 }
2357 
2358 void Assembler::prefetcht0(Address src) {
2359   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2360   InstructionMark im(this);
2361   prefetch_prefix(src);
2362   emit_int8(0x18);
2363   emit_operand(rcx, src); // 1, src
2364 }
2365 
2366 void Assembler::prefetcht1(Address src) {
2367   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2368   InstructionMark im(this);
2369   prefetch_prefix(src);
2370   emit_int8(0x18);
2371   emit_operand(rdx, src); // 2, src
2372 }
2373 
2374 void Assembler::prefetcht2(Address src) {
2375   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
2376   InstructionMark im(this);
2377   prefetch_prefix(src);
2378   emit_int8(0x18);
2379   emit_operand(rbx, src); // 3, src
2380 }
2381 
2382 void Assembler::prefetchw(Address src) {
2383   assert(VM_Version::supports_3dnow_prefetch(), "must support");
2384   InstructionMark im(this);
2385   prefetch_prefix(src);
2386   emit_int8(0x0D);
2387   emit_operand(rcx, src); // 1, src
2388 }
2389 
2390 void Assembler::prefix(Prefix p) {
2391   emit_int8(p);
2392 }
2393 
2394 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
2395   assert(VM_Version::supports_ssse3(), "");
2396   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2397   emit_int8(0x00);
2398   emit_int8((unsigned char)(0xC0 | encode));
2399 }
2400 
2401 void Assembler::pshufb(XMMRegister dst, Address src) {
2402   assert(VM_Version::supports_ssse3(), "");
2403   InstructionMark im(this);
2404   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2405   emit_int8(0x00);
2406   emit_operand(dst, src);
2407 }
2408 
2409 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
2410   assert(isByte(mode), "invalid value");
2411   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2412   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
2413   emit_int8(mode & 0xFF);
2414 
2415 }
2416 
2417 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
2418   assert(isByte(mode), "invalid value");
2419   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2420   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2421   InstructionMark im(this);
2422   simd_prefix(dst, src, VEX_SIMD_66);
2423   emit_int8(0x70);
2424   emit_operand(dst, src);
2425   emit_int8(mode & 0xFF);
2426 }
2427 
2428 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
2429   assert(isByte(mode), "invalid value");
2430   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2431   emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
2432   emit_int8(mode & 0xFF);
2433 }
2434 
2435 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
2436   assert(isByte(mode), "invalid value");
2437   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2438   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2439   InstructionMark im(this);
2440   simd_prefix(dst, src, VEX_SIMD_F2);
2441   emit_int8(0x70);
2442   emit_operand(dst, src);
2443   emit_int8(mode & 0xFF);
2444 }
2445 
2446 void Assembler::psrldq(XMMRegister dst, int shift) {
2447   // Shift 128 bit value in xmm register by number of bytes.
2448   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2449   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
2450   emit_int8(0x73);
2451   emit_int8((unsigned char)(0xC0 | encode));
2452   emit_int8(shift);
2453 }
2454 
2455 void Assembler::ptest(XMMRegister dst, Address src) {
2456   assert(VM_Version::supports_sse4_1(), "");
2457   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2458   InstructionMark im(this);
2459   simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2460   emit_int8(0x17);
2461   emit_operand(dst, src);
2462 }
2463 
2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
2465   assert(VM_Version::supports_sse4_1(), "");
2466   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
2467   emit_int8(0x17);
2468   emit_int8((unsigned char)(0xC0 | encode));
2469 }
2470 
2471 void Assembler::punpcklbw(XMMRegister dst, Address src) {
2472   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2473   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2474   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2475 }
2476 
2477 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
2478   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2479   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
2480 }
2481 
2482 void Assembler::punpckldq(XMMRegister dst, Address src) {
2483   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2484   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
2485   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2486 }
2487 
2488 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
2489   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2490   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
2491 }
2492 
2493 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
2494   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2495   emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
2496 }
2497 
2498 void Assembler::push(int32_t imm32) {
2499   // in 64bits we push 64bits onto the stack but only
2500   // take a 32bit immediate
2501   emit_int8(0x68);
2502   emit_long(imm32);
2503 }
2504 
2505 void Assembler::push(Register src) {
2506   int encode = prefix_and_encode(src->encoding());
2507 
2508   emit_int8(0x50 | encode);
2509 }
2510 
2511 void Assembler::pushf() {
2512   emit_int8((unsigned char)0x9C);
2513 }
2514 
2515 #ifndef _LP64 // no 32bit push/pop on amd64
2516 void Assembler::pushl(Address src) {
2517   // Note this will push 64bit on 64bit
2518   InstructionMark im(this);
2519   prefix(src);
2520   emit_int8((unsigned char)0xFF);
2521   emit_operand(rsi, src);
2522 }
2523 #endif
2524 
2525 void Assembler::rcll(Register dst, int imm8) {
2526   assert(isShiftCount(imm8), "illegal shift count");
2527   int encode = prefix_and_encode(dst->encoding());
2528   if (imm8 == 1) {
2529     emit_int8((unsigned char)0xD1);
2530     emit_int8((unsigned char)(0xD0 | encode));
2531   } else {
2532     emit_int8((unsigned char)0xC1);
2533     emit_int8((unsigned char)0xD0 | encode);
2534     emit_int8(imm8);
2535   }
2536 }
2537 
2538 // copies data from [esi] to [edi] using rcx pointer sized words
2539 // generic
2540 void Assembler::rep_mov() {
2541   emit_int8((unsigned char)0xF3);
2542   // MOVSQ
2543   LP64_ONLY(prefix(REX_W));
2544   emit_int8((unsigned char)0xA5);
2545 }
2546 
2547 // sets rcx pointer sized words with rax, value at [edi]
2548 // generic
2549 void Assembler::rep_set() { // rep_set
2550   emit_int8((unsigned char)0xF3);
2551   // STOSQ
2552   LP64_ONLY(prefix(REX_W));
2553   emit_int8((unsigned char)0xAB);
2554 }
2555 
2556 // scans rcx pointer sized words at [edi] for occurance of rax,
2557 // generic
2558 void Assembler::repne_scan() { // repne_scan
2559   emit_int8((unsigned char)0xF2);
2560   // SCASQ
2561   LP64_ONLY(prefix(REX_W));
2562   emit_int8((unsigned char)0xAF);
2563 }
2564 
2565 #ifdef _LP64
2566 // scans rcx 4 byte words at [edi] for occurance of rax,
2567 // generic
2568 void Assembler::repne_scanl() { // repne_scan
2569   emit_int8((unsigned char)0xF2);
2570   // SCASL
2571   emit_int8((unsigned char)0xAF);
2572 }
2573 #endif
2574 
2575 void Assembler::ret(int imm16) {
2576   if (imm16 == 0) {
2577     emit_int8((unsigned char)0xC3);
2578   } else {
2579     emit_int8((unsigned char)0xC2);
2580     emit_int16(imm16);
2581   }
2582 }
2583 
2584 void Assembler::sahf() {
2585 #ifdef _LP64
2586   // Not supported in 64bit mode
2587   ShouldNotReachHere();
2588 #endif
2589   emit_int8((unsigned char)0x9E);
2590 }
2591 
2592 void Assembler::sarl(Register dst, int imm8) {
2593   int encode = prefix_and_encode(dst->encoding());
2594   assert(isShiftCount(imm8), "illegal shift count");
2595   if (imm8 == 1) {
2596     emit_int8((unsigned char)0xD1);
2597     emit_int8((unsigned char)(0xF8 | encode));
2598   } else {
2599     emit_int8((unsigned char)0xC1);
2600     emit_int8((unsigned char)(0xF8 | encode));
2601     emit_int8(imm8);
2602   }
2603 }
2604 
2605 void Assembler::sarl(Register dst) {
2606   int encode = prefix_and_encode(dst->encoding());
2607   emit_int8((unsigned char)0xD3);
2608   emit_int8((unsigned char)(0xF8 | encode));
2609 }
2610 
2611 void Assembler::sbbl(Address dst, int32_t imm32) {
2612   InstructionMark im(this);
2613   prefix(dst);
2614   emit_arith_operand(0x81, rbx, dst, imm32);
2615 }
2616 
2617 void Assembler::sbbl(Register dst, int32_t imm32) {
2618   prefix(dst);
2619   emit_arith(0x81, 0xD8, dst, imm32);
2620 }
2621 
2622 
2623 void Assembler::sbbl(Register dst, Address src) {
2624   InstructionMark im(this);
2625   prefix(src, dst);
2626   emit_int8(0x1B);
2627   emit_operand(dst, src);
2628 }
2629 
2630 void Assembler::sbbl(Register dst, Register src) {
2631   (void) prefix_and_encode(dst->encoding(), src->encoding());
2632   emit_arith(0x1B, 0xC0, dst, src);
2633 }
2634 
2635 void Assembler::setb(Condition cc, Register dst) {
2636   assert(0 <= cc && cc < 16, "illegal cc");
2637   int encode = prefix_and_encode(dst->encoding(), true);
2638   emit_int8(0x0F);
2639   emit_int8((unsigned char)0x90 | cc);
2640   emit_int8((unsigned char)(0xC0 | encode));
2641 }
2642 
2643 void Assembler::shll(Register dst, int imm8) {
2644   assert(isShiftCount(imm8), "illegal shift count");
2645   int encode = prefix_and_encode(dst->encoding());
2646   if (imm8 == 1 ) {
2647     emit_int8((unsigned char)0xD1);
2648     emit_int8((unsigned char)(0xE0 | encode));
2649   } else {
2650     emit_int8((unsigned char)0xC1);
2651     emit_int8((unsigned char)(0xE0 | encode));
2652     emit_int8(imm8);
2653   }
2654 }
2655 
2656 void Assembler::shll(Register dst) {
2657   int encode = prefix_and_encode(dst->encoding());
2658   emit_int8((unsigned char)0xD3);
2659   emit_int8((unsigned char)(0xE0 | encode));
2660 }
2661 
2662 void Assembler::shrl(Register dst, int imm8) {
2663   assert(isShiftCount(imm8), "illegal shift count");
2664   int encode = prefix_and_encode(dst->encoding());
2665   emit_int8((unsigned char)0xC1);
2666   emit_int8((unsigned char)(0xE8 | encode));
2667   emit_int8(imm8);
2668 }
2669 
2670 void Assembler::shrl(Register dst) {
2671   int encode = prefix_and_encode(dst->encoding());
2672   emit_int8((unsigned char)0xD3);
2673   emit_int8((unsigned char)(0xE8 | encode));
2674 }
2675 
2676 // copies a single word from [esi] to [edi]
2677 void Assembler::smovl() {
2678   emit_int8((unsigned char)0xA5);
2679 }
2680 
2681 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
2682   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2683   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2684 }
2685 
2686 void Assembler::sqrtsd(XMMRegister dst, Address src) {
2687   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2688   emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
2689 }
2690 
2691 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
2692   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2693   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2694 }
2695 
2696 void Assembler::std() {
2697   emit_int8((unsigned char)0xFD);
2698 }
2699 
2700 void Assembler::sqrtss(XMMRegister dst, Address src) {
2701   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2702   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
2703 }
2704 
2705 void Assembler::stmxcsr( Address dst) {
2706   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2707   InstructionMark im(this);
2708   prefix(dst);
2709   emit_int8(0x0F);
2710   emit_int8((unsigned char)0xAE);
2711   emit_operand(as_Register(3), dst);
2712 }
2713 
2714 void Assembler::subl(Address dst, int32_t imm32) {
2715   InstructionMark im(this);
2716   prefix(dst);
2717   emit_arith_operand(0x81, rbp, dst, imm32);
2718 }
2719 
2720 void Assembler::subl(Address dst, Register src) {
2721   InstructionMark im(this);
2722   prefix(dst, src);
2723   emit_int8(0x29);
2724   emit_operand(src, dst);
2725 }
2726 
2727 void Assembler::subl(Register dst, int32_t imm32) {
2728   prefix(dst);
2729   emit_arith(0x81, 0xE8, dst, imm32);
2730 }
2731 
2732 // Force generation of a 4 byte immediate value even if it fits into 8bit
2733 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2734   prefix(dst);
2735   emit_arith_imm32(0x81, 0xE8, dst, imm32);
2736 }
2737 
2738 void Assembler::subl(Register dst, Address src) {
2739   InstructionMark im(this);
2740   prefix(src, dst);
2741   emit_int8(0x2B);
2742   emit_operand(dst, src);
2743 }
2744 
2745 void Assembler::subl(Register dst, Register src) {
2746   (void) prefix_and_encode(dst->encoding(), src->encoding());
2747   emit_arith(0x2B, 0xC0, dst, src);
2748 }
2749 
2750 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
2751   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2752   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2753 }
2754 
2755 void Assembler::subsd(XMMRegister dst, Address src) {
2756   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2757   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
2758 }
2759 
2760 void Assembler::subss(XMMRegister dst, XMMRegister src) {
2761   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2762   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2763 }
2764 
2765 void Assembler::subss(XMMRegister dst, Address src) {
2766   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2767   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
2768 }
2769 
2770 void Assembler::testb(Register dst, int imm8) {
2771   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2772   (void) prefix_and_encode(dst->encoding(), true);
2773   emit_arith_b(0xF6, 0xC0, dst, imm8);
2774 }
2775 
2776 void Assembler::testl(Register dst, int32_t imm32) {
2777   // not using emit_arith because test
2778   // doesn't support sign-extension of
2779   // 8bit operands
2780   int encode = dst->encoding();
2781   if (encode == 0) {
2782     emit_int8((unsigned char)0xA9);
2783   } else {
2784     encode = prefix_and_encode(encode);
2785     emit_int8((unsigned char)0xF7);
2786     emit_int8((unsigned char)(0xC0 | encode));
2787   }
2788   emit_long(imm32);
2789 }
2790 
2791 void Assembler::testl(Register dst, Register src) {
2792   (void) prefix_and_encode(dst->encoding(), src->encoding());
2793   emit_arith(0x85, 0xC0, dst, src);
2794 }
2795 
2796 void Assembler::testl(Register dst, Address  src) {
2797   InstructionMark im(this);
2798   prefix(src, dst);
2799   emit_int8((unsigned char)0x85);
2800   emit_operand(dst, src);
2801 }
2802 
2803 void Assembler::ucomisd(XMMRegister dst, Address src) {
2804   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2805   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2806 }
2807 
2808 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
2809   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2810   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
2811 }
2812 
2813 void Assembler::ucomiss(XMMRegister dst, Address src) {
2814   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2815   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2816 }
2817 
2818 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
2819   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2820   emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
2821 }
2822 
2823 
2824 void Assembler::xaddl(Address dst, Register src) {
2825   InstructionMark im(this);
2826   prefix(dst, src);
2827   emit_int8(0x0F);
2828   emit_int8((unsigned char)0xC1);
2829   emit_operand(src, dst);
2830 }
2831 
2832 void Assembler::xchgl(Register dst, Address src) { // xchg
2833   InstructionMark im(this);
2834   prefix(src, dst);
2835   emit_int8((unsigned char)0x87);
2836   emit_operand(dst, src);
2837 }
2838 
2839 void Assembler::xchgl(Register dst, Register src) {
2840   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2841   emit_int8((unsigned char)0x87);
2842   emit_int8((unsigned char)(0xC0 | encode));
2843 }
2844 
2845 void Assembler::xgetbv() {
2846   emit_int8(0x0F);
2847   emit_int8(0x01);
2848   emit_int8((unsigned char)0xD0);
2849 }
2850 
2851 void Assembler::xorl(Register dst, int32_t imm32) {
2852   prefix(dst);
2853   emit_arith(0x81, 0xF0, dst, imm32);
2854 }
2855 
2856 void Assembler::xorl(Register dst, Address src) {
2857   InstructionMark im(this);
2858   prefix(src, dst);
2859   emit_int8(0x33);
2860   emit_operand(dst, src);
2861 }
2862 
2863 void Assembler::xorl(Register dst, Register src) {
2864   (void) prefix_and_encode(dst->encoding(), src->encoding());
2865   emit_arith(0x33, 0xC0, dst, src);
2866 }
2867 
2868 
2869 // AVX 3-operands scalar float-point arithmetic instructions
2870 
2871 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
2872   assert(VM_Version::supports_avx(), "");
2873   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2874 }
2875 
2876 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2877   assert(VM_Version::supports_avx(), "");
2878   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2879 }
2880 
2881 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
2882   assert(VM_Version::supports_avx(), "");
2883   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2884 }
2885 
2886 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2887   assert(VM_Version::supports_avx(), "");
2888   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2889 }
2890 
2891 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
2892   assert(VM_Version::supports_avx(), "");
2893   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2894 }
2895 
2896 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2897   assert(VM_Version::supports_avx(), "");
2898   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2899 }
2900 
2901 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
2902   assert(VM_Version::supports_avx(), "");
2903   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2904 }
2905 
2906 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2907   assert(VM_Version::supports_avx(), "");
2908   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2909 }
2910 
2911 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
2912   assert(VM_Version::supports_avx(), "");
2913   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2914 }
2915 
2916 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2917   assert(VM_Version::supports_avx(), "");
2918   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2919 }
2920 
2921 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
2922   assert(VM_Version::supports_avx(), "");
2923   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2924 }
2925 
2926 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2927   assert(VM_Version::supports_avx(), "");
2928   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2929 }
2930 
2931 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
2932   assert(VM_Version::supports_avx(), "");
2933   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2934 }
2935 
2936 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2937   assert(VM_Version::supports_avx(), "");
2938   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
2939 }
2940 
2941 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
2942   assert(VM_Version::supports_avx(), "");
2943   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2944 }
2945 
2946 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
2947   assert(VM_Version::supports_avx(), "");
2948   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
2949 }
2950 
2951 //====================VECTOR ARITHMETIC=====================================
2952 
2953 // Float-point vector arithmetic
2954 
2955 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
2956   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2957   emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
2958 }
2959 
2960 void Assembler::addps(XMMRegister dst, XMMRegister src) {
2961   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2962   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
2963 }
2964 
2965 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2966   assert(VM_Version::supports_avx(), "");
2967   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2968 }
2969 
2970 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2971   assert(VM_Version::supports_avx(), "");
2972   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2973 }
2974 
2975 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2976   assert(VM_Version::supports_avx(), "");
2977   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
2978 }
2979 
2980 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
2981   assert(VM_Version::supports_avx(), "");
2982   emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
2983 }
2984 
2985 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
2986   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2987   emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
2988 }
2989 
2990 void Assembler::subps(XMMRegister dst, XMMRegister src) {
2991   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2992   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
2993 }
2994 
2995 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
2996   assert(VM_Version::supports_avx(), "");
2997   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
2998 }
2999 
3000 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3001   assert(VM_Version::supports_avx(), "");
3002   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3003 }
3004 
3005 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3006   assert(VM_Version::supports_avx(), "");
3007   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
3008 }
3009 
3010 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3011   assert(VM_Version::supports_avx(), "");
3012   emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
3013 }
3014 
3015 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
3016   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3017   emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
3018 }
3019 
3020 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
3021   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3022   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
3023 }
3024 
3025 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3026   assert(VM_Version::supports_avx(), "");
3027   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3028 }
3029 
3030 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3031   assert(VM_Version::supports_avx(), "");
3032   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3033 }
3034 
3035 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3036   assert(VM_Version::supports_avx(), "");
3037   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
3038 }
3039 
3040 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3041   assert(VM_Version::supports_avx(), "");
3042   emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
3043 }
3044 
3045 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
3046   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3047   emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
3048 }
3049 
3050 void Assembler::divps(XMMRegister dst, XMMRegister src) {
3051   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3052   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
3053 }
3054 
3055 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3056   assert(VM_Version::supports_avx(), "");
3057   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3058 }
3059 
3060 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3061   assert(VM_Version::supports_avx(), "");
3062   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3063 }
3064 
3065 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3066   assert(VM_Version::supports_avx(), "");
3067   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
3068 }
3069 
3070 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3071   assert(VM_Version::supports_avx(), "");
3072   emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
3073 }
3074 
3075 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
3076   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3077   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3078 }
3079 
3080 void Assembler::andps(XMMRegister dst, XMMRegister src) {
3081   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3082   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3083 }
3084 
3085 void Assembler::andps(XMMRegister dst, Address src) {
3086   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3087   emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
3088 }
3089 
3090 void Assembler::andpd(XMMRegister dst, Address src) {
3091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3092   emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
3093 }
3094 
3095 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3096   assert(VM_Version::supports_avx(), "");
3097   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3098 }
3099 
3100 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3101   assert(VM_Version::supports_avx(), "");
3102   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3103 }
3104 
3105 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3106   assert(VM_Version::supports_avx(), "");
3107   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
3108 }
3109 
3110 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3111   assert(VM_Version::supports_avx(), "");
3112   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
3113 }
3114 
3115 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
3116   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3117   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3118 }
3119 
3120 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
3121   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3122   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3123 }
3124 
3125 void Assembler::xorpd(XMMRegister dst, Address src) {
3126   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3127   emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
3128 }
3129 
3130 void Assembler::xorps(XMMRegister dst, Address src) {
3131   NOT_LP64(assert(VM_Version::supports_sse(), ""));
3132   emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
3133 }
3134 
3135 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3136   assert(VM_Version::supports_avx(), "");
3137   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3138 }
3139 
3140 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3141   assert(VM_Version::supports_avx(), "");
3142   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3143 }
3144 
3145 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3146   assert(VM_Version::supports_avx(), "");
3147   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
3148 }
3149 
3150 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3151   assert(VM_Version::supports_avx(), "");
3152   emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
3153 }
3154 
3155 
3156 // Integer vector arithmetic
3157 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
3158   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3159   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
3160 }
3161 
3162 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
3163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3164   emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
3165 }
3166 
3167 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
3168   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3169   emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
3170 }
3171 
3172 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
3173   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3174   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
3175 }
3176 
3177 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3178   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3179   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3180 }
3181 
3182 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3183   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3184   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3185 }
3186 
3187 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3188   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3189   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3190 }
3191 
3192 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3193   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3194   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3195 }
3196 
3197 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3198   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3199   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
3200 }
3201 
3202 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3203   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3204   emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
3205 }
3206 
3207 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3208   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3209   emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
3210 }
3211 
3212 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3213   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3214   emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
3215 }
3216 
3217 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
3218   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3219   emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
3220 }
3221 
3222 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
3223   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3224   emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
3225 }
3226 
3227 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
3228   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3229   emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
3230 }
3231 
3232 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
3233   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3234   emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
3235 }
3236 
3237 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3238   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3239   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3240 }
3241 
3242 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3243   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3244   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3245 }
3246 
3247 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3248   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3249   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3250 }
3251 
3252 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3253   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3254   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3255 }
3256 
3257 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3258   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3259   emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
3260 }
3261 
3262 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3263   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3264   emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
3265 }
3266 
3267 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3268   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3269   emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
3270 }
3271 
3272 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3273   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3274   emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
3275 }
3276 
3277 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
3278   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3279   emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
3280 }
3281 
3282 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
3283   assert(VM_Version::supports_sse4_1(), "");
3284   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
3285   emit_int8(0x40);
3286   emit_int8((unsigned char)(0xC0 | encode));
3287 }
3288 
3289 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3290   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3291   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3292 }
3293 
3294 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3295   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3296   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
3297   emit_int8(0x40);
3298   emit_int8((unsigned char)(0xC0 | encode));
3299 }
3300 
3301 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3302   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3303   emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
3304 }
3305 
3306 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3307   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3308   InstructionMark im(this);
3309   int dst_enc = dst->encoding();
3310   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
3311   vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
3312   emit_int8(0x40);
3313   emit_operand(dst, src);
3314 }
3315 
3316 // Shift packed integers left by specified number of bits.
3317 void Assembler::psllw(XMMRegister dst, int shift) {
3318   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3319   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3320   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3321   emit_int8(0x71);
3322   emit_int8((unsigned char)(0xC0 | encode));
3323   emit_int8(shift & 0xFF);
3324 }
3325 
3326 void Assembler::pslld(XMMRegister dst, int shift) {
3327   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3328   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3329   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3330   emit_int8(0x72);
3331   emit_int8((unsigned char)(0xC0 | encode));
3332   emit_int8(shift & 0xFF);
3333 }
3334 
3335 void Assembler::psllq(XMMRegister dst, int shift) {
3336   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3337   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3338   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
3339   emit_int8(0x73);
3340   emit_int8((unsigned char)(0xC0 | encode));
3341   emit_int8(shift & 0xFF);
3342 }
3343 
3344 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
3345   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3346   emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
3347 }
3348 
3349 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
3350   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3351   emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
3352 }
3353 
3354 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
3355   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3356   emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
3357 }
3358 
3359 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3360   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3361   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
3362   emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
3363   emit_int8(shift & 0xFF);
3364 }
3365 
3366 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3367   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3368   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
3369   emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
3370   emit_int8(shift & 0xFF);
3371 }
3372 
3373 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3374   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3375   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
3376   emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
3377   emit_int8(shift & 0xFF);
3378 }
3379 
3380 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3381   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3382   emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
3383 }
3384 
3385 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3386   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3387   emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
3388 }
3389 
3390 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3391   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3392   emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
3393 }
3394 
3395 // Shift packed integers logically right by specified number of bits.
3396 void Assembler::psrlw(XMMRegister dst, int shift) {
3397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3398   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
3399   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3400   emit_int8(0x71);
3401   emit_int8((unsigned char)(0xC0 | encode));
3402   emit_int8(shift & 0xFF);
3403 }
3404 
3405 void Assembler::psrld(XMMRegister dst, int shift) {
3406   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3407   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
3408   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3409   emit_int8(0x72);
3410   emit_int8((unsigned char)(0xC0 | encode));
3411   emit_int8(shift & 0xFF);
3412 }
3413 
3414 void Assembler::psrlq(XMMRegister dst, int shift) {
3415   // Do not confuse it with psrldq SSE2 instruction which
3416   // shifts 128 bit value in xmm register by number of bytes.
3417   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3418   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3419   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
3420   emit_int8(0x73);
3421   emit_int8((unsigned char)(0xC0 | encode));
3422   emit_int8(shift & 0xFF);
3423 }
3424 
3425 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
3426   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3427   emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
3428 }
3429 
3430 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
3431   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3432   emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
3433 }
3434 
3435 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
3436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3437   emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
3438 }
3439 
3440 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3441   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3442   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3443   emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
3444   emit_int8(shift & 0xFF);
3445 }
3446 
3447 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3448   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3449   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3450   emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
3451   emit_int8(shift & 0xFF);
3452 }
3453 
3454 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3455   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3456   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
3457   emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
3458   emit_int8(shift & 0xFF);
3459 }
3460 
3461 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3462   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3463   emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
3464 }
3465 
3466 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3467   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3468   emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
3469 }
3470 
3471 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3472   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3473   emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
3474 }
3475 
3476 // Shift packed integers arithmetically right by specified number of bits.
3477 void Assembler::psraw(XMMRegister dst, int shift) {
3478   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3479   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3480   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3481   emit_int8(0x71);
3482   emit_int8((unsigned char)(0xC0 | encode));
3483   emit_int8(shift & 0xFF);
3484 }
3485 
3486 void Assembler::psrad(XMMRegister dst, int shift) {
3487   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3488   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
3489   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
3490   emit_int8(0x72);
3491   emit_int8((unsigned char)(0xC0 | encode));
3492   emit_int8(shift & 0xFF);
3493 }
3494 
3495 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
3496   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3497   emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
3498 }
3499 
3500 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
3501   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3502   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
3503 }
3504 
3505 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3506   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3507   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3508   emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
3509   emit_int8(shift & 0xFF);
3510 }
3511 
3512 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
3513   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3514   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
3515   emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
3516   emit_int8(shift & 0xFF);
3517 }
3518 
3519 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3520   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3521   emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
3522 }
3523 
3524 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
3525   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3526   emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
3527 }
3528 
3529 
3530 // AND packed integers
3531 void Assembler::pand(XMMRegister dst, XMMRegister src) {
3532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3533   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
3534 }
3535 
3536 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3537   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3538   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3539 }
3540 
3541 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3542   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3543   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
3544 }
3545 
3546 void Assembler::por(XMMRegister dst, XMMRegister src) {
3547   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3548   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
3549 }
3550 
3551 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3552   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3553   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3554 }
3555 
3556 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3557   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3558   emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
3559 }
3560 
3561 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
3562   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3563   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
3564 }
3565 
3566 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
3567   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3568   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3569 }
3570 
3571 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
3572   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
3573   emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
3574 }
3575 
3576 
3577 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3578   assert(VM_Version::supports_avx(), "");
3579   bool vector256 = true;
3580   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3581   emit_int8(0x18);
3582   emit_int8((unsigned char)(0xC0 | encode));
3583   // 0x00 - insert into lower 128 bits
3584   // 0x01 - insert into upper 128 bits
3585   emit_int8(0x01);
3586 }
3587 
3588 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
3589   assert(VM_Version::supports_avx(), "");
3590   InstructionMark im(this);
3591   bool vector256 = true;
3592   assert(dst != xnoreg, "sanity");
3593   int dst_enc = dst->encoding();
3594   // swap src<->dst for encoding
3595   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3596   emit_int8(0x18);
3597   emit_operand(dst, src);
3598   // 0x01 - insert into upper 128 bits
3599   emit_int8(0x01);
3600 }
3601 
3602 void Assembler::vextractf128h(Address dst, XMMRegister src) {
3603   assert(VM_Version::supports_avx(), "");
3604   InstructionMark im(this);
3605   bool vector256 = true;
3606   assert(src != xnoreg, "sanity");
3607   int src_enc = src->encoding();
3608   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3609   emit_int8(0x19);
3610   emit_operand(src, dst);
3611   // 0x01 - extract from upper 128 bits
3612   emit_int8(0x01);
3613 }
3614 
3615 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
3616   assert(VM_Version::supports_avx2(), "");
3617   bool vector256 = true;
3618   int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
3619   emit_int8(0x38);
3620   emit_int8((unsigned char)(0xC0 | encode));
3621   // 0x00 - insert into lower 128 bits
3622   // 0x01 - insert into upper 128 bits
3623   emit_int8(0x01);
3624 }
3625 
3626 void Assembler::vinserti128h(XMMRegister dst, Address src) {
3627   assert(VM_Version::supports_avx2(), "");
3628   InstructionMark im(this);
3629   bool vector256 = true;
3630   assert(dst != xnoreg, "sanity");
3631   int dst_enc = dst->encoding();
3632   // swap src<->dst for encoding
3633   vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3634   emit_int8(0x38);
3635   emit_operand(dst, src);
3636   // 0x01 - insert into upper 128 bits
3637   emit_int8(0x01);
3638 }
3639 
3640 void Assembler::vextracti128h(Address dst, XMMRegister src) {
3641   assert(VM_Version::supports_avx2(), "");
3642   InstructionMark im(this);
3643   bool vector256 = true;
3644   assert(src != xnoreg, "sanity");
3645   int src_enc = src->encoding();
3646   vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
3647   emit_int8(0x39);
3648   emit_operand(src, dst);
3649   // 0x01 - extract from upper 128 bits
3650   emit_int8(0x01);
3651 }
3652 
3653 void Assembler::vzeroupper() {
3654   assert(VM_Version::supports_avx(), "");
3655   (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
3656   emit_int8(0x77);
3657 }
3658 
3659 
3660 #ifndef _LP64
3661 // 32bit only pieces of the assembler
3662 
3663 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
3664   // NO PREFIX AS NEVER 64BIT
3665   InstructionMark im(this);
3666   emit_int8((unsigned char)0x81);
3667   emit_int8((unsigned char)(0xF8 | src1->encoding()));
3668   emit_data(imm32, rspec, 0);
3669 }
3670 
3671 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
3672   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
3673   InstructionMark im(this);
3674   emit_int8((unsigned char)0x81);
3675   emit_operand(rdi, src1);
3676   emit_data(imm32, rspec, 0);
3677 }
3678 
3679 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
3680 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
3681 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
3682 void Assembler::cmpxchg8(Address adr) {
3683   InstructionMark im(this);
3684   emit_int8(0x0F);
3685   emit_int8((unsigned char)0xC7);
3686   emit_operand(rcx, adr);
3687 }
3688 
3689 void Assembler::decl(Register dst) {
3690   // Don't use it directly. Use MacroAssembler::decrementl() instead.
3691  emit_int8(0x48 | dst->encoding());
3692 }
3693 
3694 #endif // _LP64
3695 
3696 // 64bit typically doesn't use the x87 but needs to for the trig funcs
3697 
3698 void Assembler::fabs() {
3699   emit_int8((unsigned char)0xD9);
3700   emit_int8((unsigned char)0xE1);
3701 }
3702 
3703 void Assembler::fadd(int i) {
3704   emit_farith(0xD8, 0xC0, i);
3705 }
3706 
3707 void Assembler::fadd_d(Address src) {
3708   InstructionMark im(this);
3709   emit_int8((unsigned char)0xDC);
3710   emit_operand32(rax, src);
3711 }
3712 
3713 void Assembler::fadd_s(Address src) {
3714   InstructionMark im(this);
3715   emit_int8((unsigned char)0xD8);
3716   emit_operand32(rax, src);
3717 }
3718 
3719 void Assembler::fadda(int i) {
3720   emit_farith(0xDC, 0xC0, i);
3721 }
3722 
3723 void Assembler::faddp(int i) {
3724   emit_farith(0xDE, 0xC0, i);
3725 }
3726 
3727 void Assembler::fchs() {
3728   emit_int8((unsigned char)0xD9);
3729   emit_int8((unsigned char)0xE0);
3730 }
3731 
3732 void Assembler::fcom(int i) {
3733   emit_farith(0xD8, 0xD0, i);
3734 }
3735 
3736 void Assembler::fcomp(int i) {
3737   emit_farith(0xD8, 0xD8, i);
3738 }
3739 
3740 void Assembler::fcomp_d(Address src) {
3741   InstructionMark im(this);
3742   emit_int8((unsigned char)0xDC);
3743   emit_operand32(rbx, src);
3744 }
3745 
3746 void Assembler::fcomp_s(Address src) {
3747   InstructionMark im(this);
3748   emit_int8((unsigned char)0xD8);
3749   emit_operand32(rbx, src);
3750 }
3751 
3752 void Assembler::fcompp() {
3753   emit_int8((unsigned char)0xDE);
3754   emit_int8((unsigned char)0xD9);
3755 }
3756 
3757 void Assembler::fcos() {
3758   emit_int8((unsigned char)0xD9);
3759   emit_int8((unsigned char)0xFF);
3760 }
3761 
3762 void Assembler::fdecstp() {
3763   emit_int8((unsigned char)0xD9);
3764   emit_int8((unsigned char)0xF6);
3765 }
3766 
3767 void Assembler::fdiv(int i) {
3768   emit_farith(0xD8, 0xF0, i);
3769 }
3770 
3771 void Assembler::fdiv_d(Address src) {
3772   InstructionMark im(this);
3773   emit_int8((unsigned char)0xDC);
3774   emit_operand32(rsi, src);
3775 }
3776 
3777 void Assembler::fdiv_s(Address src) {
3778   InstructionMark im(this);
3779   emit_int8((unsigned char)0xD8);
3780   emit_operand32(rsi, src);
3781 }
3782 
3783 void Assembler::fdiva(int i) {
3784   emit_farith(0xDC, 0xF8, i);
3785 }
3786 
3787 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
3788 //       is erroneous for some of the floating-point instructions below.
3789 
3790 void Assembler::fdivp(int i) {
3791   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
3792 }
3793 
3794 void Assembler::fdivr(int i) {
3795   emit_farith(0xD8, 0xF8, i);
3796 }
3797 
3798 void Assembler::fdivr_d(Address src) {
3799   InstructionMark im(this);
3800   emit_int8((unsigned char)0xDC);
3801   emit_operand32(rdi, src);
3802 }
3803 
3804 void Assembler::fdivr_s(Address src) {
3805   InstructionMark im(this);
3806   emit_int8((unsigned char)0xD8);
3807   emit_operand32(rdi, src);
3808 }
3809 
3810 void Assembler::fdivra(int i) {
3811   emit_farith(0xDC, 0xF0, i);
3812 }
3813 
3814 void Assembler::fdivrp(int i) {
3815   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
3816 }
3817 
3818 void Assembler::ffree(int i) {
3819   emit_farith(0xDD, 0xC0, i);
3820 }
3821 
3822 void Assembler::fild_d(Address adr) {
3823   InstructionMark im(this);
3824   emit_int8((unsigned char)0xDF);
3825   emit_operand32(rbp, adr);
3826 }
3827 
3828 void Assembler::fild_s(Address adr) {
3829   InstructionMark im(this);
3830   emit_int8((unsigned char)0xDB);
3831   emit_operand32(rax, adr);
3832 }
3833 
3834 void Assembler::fincstp() {
3835   emit_int8((unsigned char)0xD9);
3836   emit_int8((unsigned char)0xF7);
3837 }
3838 
3839 void Assembler::finit() {
3840   emit_int8((unsigned char)0x9B);
3841   emit_int8((unsigned char)0xDB);
3842   emit_int8((unsigned char)0xE3);
3843 }
3844 
3845 void Assembler::fist_s(Address adr) {
3846   InstructionMark im(this);
3847   emit_int8((unsigned char)0xDB);
3848   emit_operand32(rdx, adr);
3849 }
3850 
3851 void Assembler::fistp_d(Address adr) {
3852   InstructionMark im(this);
3853   emit_int8((unsigned char)0xDF);
3854   emit_operand32(rdi, adr);
3855 }
3856 
3857 void Assembler::fistp_s(Address adr) {
3858   InstructionMark im(this);
3859   emit_int8((unsigned char)0xDB);
3860   emit_operand32(rbx, adr);
3861 }
3862 
3863 void Assembler::fld1() {
3864   emit_int8((unsigned char)0xD9);
3865   emit_int8((unsigned char)0xE8);
3866 }
3867 
3868 void Assembler::fld_d(Address adr) {
3869   InstructionMark im(this);
3870   emit_int8((unsigned char)0xDD);
3871   emit_operand32(rax, adr);
3872 }
3873 
3874 void Assembler::fld_s(Address adr) {
3875   InstructionMark im(this);
3876   emit_int8((unsigned char)0xD9);
3877   emit_operand32(rax, adr);
3878 }
3879 
3880 
3881 void Assembler::fld_s(int index) {
3882   emit_farith(0xD9, 0xC0, index);
3883 }
3884 
3885 void Assembler::fld_x(Address adr) {
3886   InstructionMark im(this);
3887   emit_int8((unsigned char)0xDB);
3888   emit_operand32(rbp, adr);
3889 }
3890 
3891 void Assembler::fldcw(Address src) {
3892   InstructionMark im(this);
3893   emit_int8((unsigned char)0xD9);
3894   emit_operand32(rbp, src);
3895 }
3896 
3897 void Assembler::fldenv(Address src) {
3898   InstructionMark im(this);
3899   emit_int8((unsigned char)0xD9);
3900   emit_operand32(rsp, src);
3901 }
3902 
3903 void Assembler::fldlg2() {
3904   emit_int8((unsigned char)0xD9);
3905   emit_int8((unsigned char)0xEC);
3906 }
3907 
3908 void Assembler::fldln2() {
3909   emit_int8((unsigned char)0xD9);
3910   emit_int8((unsigned char)0xED);
3911 }
3912 
3913 void Assembler::fldz() {
3914   emit_int8((unsigned char)0xD9);
3915   emit_int8((unsigned char)0xEE);
3916 }
3917 
3918 void Assembler::flog() {
3919   fldln2();
3920   fxch();
3921   fyl2x();
3922 }
3923 
3924 void Assembler::flog10() {
3925   fldlg2();
3926   fxch();
3927   fyl2x();
3928 }
3929 
3930 void Assembler::fmul(int i) {
3931   emit_farith(0xD8, 0xC8, i);
3932 }
3933 
3934 void Assembler::fmul_d(Address src) {
3935   InstructionMark im(this);
3936   emit_int8((unsigned char)0xDC);
3937   emit_operand32(rcx, src);
3938 }
3939 
3940 void Assembler::fmul_s(Address src) {
3941   InstructionMark im(this);
3942   emit_int8((unsigned char)0xD8);
3943   emit_operand32(rcx, src);
3944 }
3945 
3946 void Assembler::fmula(int i) {
3947   emit_farith(0xDC, 0xC8, i);
3948 }
3949 
3950 void Assembler::fmulp(int i) {
3951   emit_farith(0xDE, 0xC8, i);
3952 }
3953 
3954 void Assembler::fnsave(Address dst) {
3955   InstructionMark im(this);
3956   emit_int8((unsigned char)0xDD);
3957   emit_operand32(rsi, dst);
3958 }
3959 
3960 void Assembler::fnstcw(Address src) {
3961   InstructionMark im(this);
3962   emit_int8((unsigned char)0x9B);
3963   emit_int8((unsigned char)0xD9);
3964   emit_operand32(rdi, src);
3965 }
3966 
3967 void Assembler::fnstsw_ax() {
3968   emit_int8((unsigned char)0xDF);
3969   emit_int8((unsigned char)0xE0);
3970 }
3971 
3972 void Assembler::fprem() {
3973   emit_int8((unsigned char)0xD9);
3974   emit_int8((unsigned char)0xF8);
3975 }
3976 
3977 void Assembler::fprem1() {
3978   emit_int8((unsigned char)0xD9);
3979   emit_int8((unsigned char)0xF5);
3980 }
3981 
3982 void Assembler::frstor(Address src) {
3983   InstructionMark im(this);
3984   emit_int8((unsigned char)0xDD);
3985   emit_operand32(rsp, src);
3986 }
3987 
3988 void Assembler::fsin() {
3989   emit_int8((unsigned char)0xD9);
3990   emit_int8((unsigned char)0xFE);
3991 }
3992 
3993 void Assembler::fsqrt() {
3994   emit_int8((unsigned char)0xD9);
3995   emit_int8((unsigned char)0xFA);
3996 }
3997 
3998 void Assembler::fst_d(Address adr) {
3999   InstructionMark im(this);
4000   emit_int8((unsigned char)0xDD);
4001   emit_operand32(rdx, adr);
4002 }
4003 
4004 void Assembler::fst_s(Address adr) {
4005   InstructionMark im(this);
4006   emit_int8((unsigned char)0xD9);
4007   emit_operand32(rdx, adr);
4008 }
4009 
4010 void Assembler::fstp_d(Address adr) {
4011   InstructionMark im(this);
4012   emit_int8((unsigned char)0xDD);
4013   emit_operand32(rbx, adr);
4014 }
4015 
4016 void Assembler::fstp_d(int index) {
4017   emit_farith(0xDD, 0xD8, index);
4018 }
4019 
4020 void Assembler::fstp_s(Address adr) {
4021   InstructionMark im(this);
4022   emit_int8((unsigned char)0xD9);
4023   emit_operand32(rbx, adr);
4024 }
4025 
4026 void Assembler::fstp_x(Address adr) {
4027   InstructionMark im(this);
4028   emit_int8((unsigned char)0xDB);
4029   emit_operand32(rdi, adr);
4030 }
4031 
4032 void Assembler::fsub(int i) {
4033   emit_farith(0xD8, 0xE0, i);
4034 }
4035 
4036 void Assembler::fsub_d(Address src) {
4037   InstructionMark im(this);
4038   emit_int8((unsigned char)0xDC);
4039   emit_operand32(rsp, src);
4040 }
4041 
4042 void Assembler::fsub_s(Address src) {
4043   InstructionMark im(this);
4044   emit_int8((unsigned char)0xD8);
4045   emit_operand32(rsp, src);
4046 }
4047 
4048 void Assembler::fsuba(int i) {
4049   emit_farith(0xDC, 0xE8, i);
4050 }
4051 
4052 void Assembler::fsubp(int i) {
4053   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
4054 }
4055 
4056 void Assembler::fsubr(int i) {
4057   emit_farith(0xD8, 0xE8, i);
4058 }
4059 
4060 void Assembler::fsubr_d(Address src) {
4061   InstructionMark im(this);
4062   emit_int8((unsigned char)0xDC);
4063   emit_operand32(rbp, src);
4064 }
4065 
4066 void Assembler::fsubr_s(Address src) {
4067   InstructionMark im(this);
4068   emit_int8((unsigned char)0xD8);
4069   emit_operand32(rbp, src);
4070 }
4071 
4072 void Assembler::fsubra(int i) {
4073   emit_farith(0xDC, 0xE0, i);
4074 }
4075 
4076 void Assembler::fsubrp(int i) {
4077   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
4078 }
4079 
4080 void Assembler::ftan() {
4081   emit_int8((unsigned char)0xD9);
4082   emit_int8((unsigned char)0xF2);
4083   emit_int8((unsigned char)0xDD);
4084   emit_int8((unsigned char)0xD8);
4085 }
4086 
4087 void Assembler::ftst() {
4088   emit_int8((unsigned char)0xD9);
4089   emit_int8((unsigned char)0xE4);
4090 }
4091 
4092 void Assembler::fucomi(int i) {
4093   // make sure the instruction is supported (introduced for P6, together with cmov)
4094   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4095   emit_farith(0xDB, 0xE8, i);
4096 }
4097 
4098 void Assembler::fucomip(int i) {
4099   // make sure the instruction is supported (introduced for P6, together with cmov)
4100   guarantee(VM_Version::supports_cmov(), "illegal instruction");
4101   emit_farith(0xDF, 0xE8, i);
4102 }
4103 
4104 void Assembler::fwait() {
4105   emit_int8((unsigned char)0x9B);
4106 }
4107 
4108 void Assembler::fxch(int i) {
4109   emit_farith(0xD9, 0xC8, i);
4110 }
4111 
4112 void Assembler::fyl2x() {
4113   emit_int8((unsigned char)0xD9);
4114   emit_int8((unsigned char)0xF1);
4115 }
4116 
4117 void Assembler::frndint() {
4118   emit_int8((unsigned char)0xD9);
4119   emit_int8((unsigned char)0xFC);
4120 }
4121 
4122 void Assembler::f2xm1() {
4123   emit_int8((unsigned char)0xD9);
4124   emit_int8((unsigned char)0xF0);
4125 }
4126 
4127 void Assembler::fldl2e() {
4128   emit_int8((unsigned char)0xD9);
4129   emit_int8((unsigned char)0xEA);
4130 }
4131 
4132 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
4133 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
4134 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
4135 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
4136 
4137 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
4138 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4139   if (pre > 0) {
4140     emit_int8(simd_pre[pre]);
4141   }
4142   if (rex_w) {
4143     prefixq(adr, xreg);
4144   } else {
4145     prefix(adr, xreg);
4146   }
4147   if (opc > 0) {
4148     emit_int8(0x0F);
4149     int opc2 = simd_opc[opc];
4150     if (opc2 > 0) {
4151       emit_int8(opc2);
4152     }
4153   }
4154 }
4155 
4156 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
4157   if (pre > 0) {
4158     emit_int8(simd_pre[pre]);
4159   }
4160   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
4161                           prefix_and_encode(dst_enc, src_enc);
4162   if (opc > 0) {
4163     emit_int8(0x0F);
4164     int opc2 = simd_opc[opc];
4165     if (opc2 > 0) {
4166       emit_int8(opc2);
4167     }
4168   }
4169   return encode;
4170 }
4171 
4172 
4173 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
4174   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
4175     prefix(VEX_3bytes);
4176 
4177     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
4178     byte1 = (~byte1) & 0xE0;
4179     byte1 |= opc;
4180     emit_int8(byte1);
4181 
4182     int byte2 = ((~nds_enc) & 0xf) << 3;
4183     byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
4184     emit_int8(byte2);
4185   } else {
4186     prefix(VEX_2bytes);
4187 
4188     int byte1 = vex_r ? VEX_R : 0;
4189     byte1 = (~byte1) & 0x80;
4190     byte1 |= ((~nds_enc) & 0xf) << 3;
4191     byte1 |= (vector256 ? 4 : 0) | pre;
4192     emit_int8(byte1);
4193   }
4194 }
4195 
4196 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
4197   bool vex_r = (xreg_enc >= 8);
4198   bool vex_b = adr.base_needs_rex();
4199   bool vex_x = adr.index_needs_rex();
4200   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4201 }
4202 
4203 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
4204   bool vex_r = (dst_enc >= 8);
4205   bool vex_b = (src_enc >= 8);
4206   bool vex_x = false;
4207   vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
4208   return (((dst_enc & 7) << 3) | (src_enc & 7));
4209 }
4210 
4211 
4212 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4213   if (UseAVX > 0) {
4214     int xreg_enc = xreg->encoding();
4215     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
4216     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
4217   } else {
4218     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
4219     rex_prefix(adr, xreg, pre, opc, rex_w);
4220   }
4221 }
4222 
4223 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
4224   int dst_enc = dst->encoding();
4225   int src_enc = src->encoding();
4226   if (UseAVX > 0) {
4227     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
4228     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
4229   } else {
4230     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
4231     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
4232   }
4233 }
4234 
4235 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4236   InstructionMark im(this);
4237   simd_prefix(dst, dst, src, pre);
4238   emit_int8(opcode);
4239   emit_operand(dst, src);
4240 }
4241 
4242 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4243   int encode = simd_prefix_and_encode(dst, dst, src, pre);
4244   emit_int8(opcode);
4245   emit_int8((unsigned char)(0xC0 | encode));
4246 }
4247 
4248 // Versions with no second source register (non-destructive source).
4249 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
4250   InstructionMark im(this);
4251   simd_prefix(dst, xnoreg, src, pre);
4252   emit_int8(opcode);
4253   emit_operand(dst, src);
4254 }
4255 
4256 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
4257   int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
4258   emit_int8(opcode);
4259   emit_int8((unsigned char)(0xC0 | encode));
4260 }
4261 
4262 // 3-operands AVX instructions
4263 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4264                                Address src, VexSimdPrefix pre, bool vector256) {
4265   InstructionMark im(this);
4266   vex_prefix(dst, nds, src, pre, vector256);
4267   emit_int8(opcode);
4268   emit_operand(dst, src);
4269 }
4270 
4271 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
4272                                XMMRegister src, VexSimdPrefix pre, bool vector256) {
4273   int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
4274   emit_int8(opcode);
4275   emit_int8((unsigned char)(0xC0 | encode));
4276 }
4277 
4278 #ifndef _LP64
4279 
4280 void Assembler::incl(Register dst) {
4281   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4282   emit_int8(0x40 | dst->encoding());
4283 }
4284 
4285 void Assembler::lea(Register dst, Address src) {
4286   leal(dst, src);
4287 }
4288 
4289 void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4290   InstructionMark im(this);
4291   emit_int8((unsigned char)0xC7);
4292   emit_operand(rax, dst);
4293   emit_data((int)imm32, rspec, 0);
4294 }
4295 
4296 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4297   InstructionMark im(this);
4298   int encode = prefix_and_encode(dst->encoding());
4299   emit_int8((unsigned char)(0xB8 | encode));
4300   emit_data((int)imm32, rspec, 0);
4301 }
4302 
4303 void Assembler::popa() { // 32bit
4304   emit_int8(0x61);
4305 }
4306 
4307 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
4308   InstructionMark im(this);
4309   emit_int8(0x68);
4310   emit_data(imm32, rspec, 0);
4311 }
4312 
4313 void Assembler::pusha() { // 32bit
4314   emit_int8(0x60);
4315 }
4316 
4317 void Assembler::set_byte_if_not_zero(Register dst) {
4318   emit_int8(0x0F);
4319   emit_int8((unsigned char)0x95);
4320   emit_int8((unsigned char)(0xE0 | dst->encoding()));
4321 }
4322 
4323 void Assembler::shldl(Register dst, Register src) {
4324   emit_int8(0x0F);
4325   emit_int8((unsigned char)0xA5);
4326   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4327 }
4328 
4329 void Assembler::shrdl(Register dst, Register src) {
4330   emit_int8(0x0F);
4331   emit_int8((unsigned char)0xAD);
4332   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
4333 }
4334 
4335 #else // LP64
4336 
4337 void Assembler::set_byte_if_not_zero(Register dst) {
4338   int enc = prefix_and_encode(dst->encoding(), true);
4339   emit_int8(0x0F);
4340   emit_int8((unsigned char)0x95);
4341   emit_int8((unsigned char)(0xE0 | enc));
4342 }
4343 
4344 // 64bit only pieces of the assembler
4345 // This should only be used by 64bit instructions that can use rip-relative
4346 // it cannot be used by instructions that want an immediate value.
4347 
4348 bool Assembler::reachable(AddressLiteral adr) {
4349   int64_t disp;
4350   // None will force a 64bit literal to the code stream. Likely a placeholder
4351   // for something that will be patched later and we need to certain it will
4352   // always be reachable.
4353   if (adr.reloc() == relocInfo::none) {
4354     return false;
4355   }
4356   if (adr.reloc() == relocInfo::internal_word_type) {
4357     // This should be rip relative and easily reachable.
4358     return true;
4359   }
4360   if (adr.reloc() == relocInfo::virtual_call_type ||
4361       adr.reloc() == relocInfo::opt_virtual_call_type ||
4362       adr.reloc() == relocInfo::static_call_type ||
4363       adr.reloc() == relocInfo::static_stub_type ) {
4364     // This should be rip relative within the code cache and easily
4365     // reachable until we get huge code caches. (At which point
4366     // ic code is going to have issues).
4367     return true;
4368   }
4369   if (adr.reloc() != relocInfo::external_word_type &&
4370       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
4371       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
4372       adr.reloc() != relocInfo::runtime_call_type ) {
4373     return false;
4374   }
4375 
4376   // Stress the correction code
4377   if (ForceUnreachable) {
4378     // Must be runtimecall reloc, see if it is in the codecache
4379     // Flipping stuff in the codecache to be unreachable causes issues
4380     // with things like inline caches where the additional instructions
4381     // are not handled.
4382     if (CodeCache::find_blob(adr._target) == NULL) {
4383       return false;
4384     }
4385   }
4386   // For external_word_type/runtime_call_type if it is reachable from where we
4387   // are now (possibly a temp buffer) and where we might end up
4388   // anywhere in the codeCache then we are always reachable.
4389   // This would have to change if we ever save/restore shared code
4390   // to be more pessimistic.
4391   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
4392   if (!is_simm32(disp)) return false;
4393   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
4394   if (!is_simm32(disp)) return false;
4395 
4396   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
4397 
4398   // Because rip relative is a disp + address_of_next_instruction and we
4399   // don't know the value of address_of_next_instruction we apply a fudge factor
4400   // to make sure we will be ok no matter the size of the instruction we get placed into.
4401   // We don't have to fudge the checks above here because they are already worst case.
4402 
4403   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
4404   // + 4 because better safe than sorry.
4405   const int fudge = 12 + 4;
4406   if (disp < 0) {
4407     disp -= fudge;
4408   } else {
4409     disp += fudge;
4410   }
4411   return is_simm32(disp);
4412 }
4413 
4414 // Check if the polling page is not reachable from the code cache using rip-relative
4415 // addressing.
4416 bool Assembler::is_polling_page_far() {
4417   intptr_t addr = (intptr_t)os::get_polling_page();
4418   return ForceUnreachable ||
4419          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
4420          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
4421 }
4422 
4423 void Assembler::emit_data64(jlong data,
4424                             relocInfo::relocType rtype,
4425                             int format) {
4426   if (rtype == relocInfo::none) {
4427     emit_int64(data);
4428   } else {
4429     emit_data64(data, Relocation::spec_simple(rtype), format);
4430   }
4431 }
4432 
4433 void Assembler::emit_data64(jlong data,
4434                             RelocationHolder const& rspec,
4435                             int format) {
4436   assert(imm_operand == 0, "default format must be immediate in this file");
4437   assert(imm_operand == format, "must be immediate");
4438   assert(inst_mark() != NULL, "must be inside InstructionMark");
4439   // Do not use AbstractAssembler::relocate, which is not intended for
4440   // embedded words.  Instead, relocate to the enclosing instruction.
4441   code_section()->relocate(inst_mark(), rspec, format);
4442 #ifdef ASSERT
4443   check_relocation(rspec, format);
4444 #endif
4445   emit_int64(data);
4446 }
4447 
4448 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
4449   if (reg_enc >= 8) {
4450     prefix(REX_B);
4451     reg_enc -= 8;
4452   } else if (byteinst && reg_enc >= 4) {
4453     prefix(REX);
4454   }
4455   return reg_enc;
4456 }
4457 
4458 int Assembler::prefixq_and_encode(int reg_enc) {
4459   if (reg_enc < 8) {
4460     prefix(REX_W);
4461   } else {
4462     prefix(REX_WB);
4463     reg_enc -= 8;
4464   }
4465   return reg_enc;
4466 }
4467 
4468 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
4469   if (dst_enc < 8) {
4470     if (src_enc >= 8) {
4471       prefix(REX_B);
4472       src_enc -= 8;
4473     } else if (byteinst && src_enc >= 4) {
4474       prefix(REX);
4475     }
4476   } else {
4477     if (src_enc < 8) {
4478       prefix(REX_R);
4479     } else {
4480       prefix(REX_RB);
4481       src_enc -= 8;
4482     }
4483     dst_enc -= 8;
4484   }
4485   return dst_enc << 3 | src_enc;
4486 }
4487 
4488 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
4489   if (dst_enc < 8) {
4490     if (src_enc < 8) {
4491       prefix(REX_W);
4492     } else {
4493       prefix(REX_WB);
4494       src_enc -= 8;
4495     }
4496   } else {
4497     if (src_enc < 8) {
4498       prefix(REX_WR);
4499     } else {
4500       prefix(REX_WRB);
4501       src_enc -= 8;
4502     }
4503     dst_enc -= 8;
4504   }
4505   return dst_enc << 3 | src_enc;
4506 }
4507 
4508 void Assembler::prefix(Register reg) {
4509   if (reg->encoding() >= 8) {
4510     prefix(REX_B);
4511   }
4512 }
4513 
4514 void Assembler::prefix(Address adr) {
4515   if (adr.base_needs_rex()) {
4516     if (adr.index_needs_rex()) {
4517       prefix(REX_XB);
4518     } else {
4519       prefix(REX_B);
4520     }
4521   } else {
4522     if (adr.index_needs_rex()) {
4523       prefix(REX_X);
4524     }
4525   }
4526 }
4527 
4528 void Assembler::prefixq(Address adr) {
4529   if (adr.base_needs_rex()) {
4530     if (adr.index_needs_rex()) {
4531       prefix(REX_WXB);
4532     } else {
4533       prefix(REX_WB);
4534     }
4535   } else {
4536     if (adr.index_needs_rex()) {
4537       prefix(REX_WX);
4538     } else {
4539       prefix(REX_W);
4540     }
4541   }
4542 }
4543 
4544 
4545 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
4546   if (reg->encoding() < 8) {
4547     if (adr.base_needs_rex()) {
4548       if (adr.index_needs_rex()) {
4549         prefix(REX_XB);
4550       } else {
4551         prefix(REX_B);
4552       }
4553     } else {
4554       if (adr.index_needs_rex()) {
4555         prefix(REX_X);
4556       } else if (byteinst && reg->encoding() >= 4 ) {
4557         prefix(REX);
4558       }
4559     }
4560   } else {
4561     if (adr.base_needs_rex()) {
4562       if (adr.index_needs_rex()) {
4563         prefix(REX_RXB);
4564       } else {
4565         prefix(REX_RB);
4566       }
4567     } else {
4568       if (adr.index_needs_rex()) {
4569         prefix(REX_RX);
4570       } else {
4571         prefix(REX_R);
4572       }
4573     }
4574   }
4575 }
4576 
4577 void Assembler::prefixq(Address adr, Register src) {
4578   if (src->encoding() < 8) {
4579     if (adr.base_needs_rex()) {
4580       if (adr.index_needs_rex()) {
4581         prefix(REX_WXB);
4582       } else {
4583         prefix(REX_WB);
4584       }
4585     } else {
4586       if (adr.index_needs_rex()) {
4587         prefix(REX_WX);
4588       } else {
4589         prefix(REX_W);
4590       }
4591     }
4592   } else {
4593     if (adr.base_needs_rex()) {
4594       if (adr.index_needs_rex()) {
4595         prefix(REX_WRXB);
4596       } else {
4597         prefix(REX_WRB);
4598       }
4599     } else {
4600       if (adr.index_needs_rex()) {
4601         prefix(REX_WRX);
4602       } else {
4603         prefix(REX_WR);
4604       }
4605     }
4606   }
4607 }
4608 
4609 void Assembler::prefix(Address adr, XMMRegister reg) {
4610   if (reg->encoding() < 8) {
4611     if (adr.base_needs_rex()) {
4612       if (adr.index_needs_rex()) {
4613         prefix(REX_XB);
4614       } else {
4615         prefix(REX_B);
4616       }
4617     } else {
4618       if (adr.index_needs_rex()) {
4619         prefix(REX_X);
4620       }
4621     }
4622   } else {
4623     if (adr.base_needs_rex()) {
4624       if (adr.index_needs_rex()) {
4625         prefix(REX_RXB);
4626       } else {
4627         prefix(REX_RB);
4628       }
4629     } else {
4630       if (adr.index_needs_rex()) {
4631         prefix(REX_RX);
4632       } else {
4633         prefix(REX_R);
4634       }
4635     }
4636   }
4637 }
4638 
4639 void Assembler::prefixq(Address adr, XMMRegister src) {
4640   if (src->encoding() < 8) {
4641     if (adr.base_needs_rex()) {
4642       if (adr.index_needs_rex()) {
4643         prefix(REX_WXB);
4644       } else {
4645         prefix(REX_WB);
4646       }
4647     } else {
4648       if (adr.index_needs_rex()) {
4649         prefix(REX_WX);
4650       } else {
4651         prefix(REX_W);
4652       }
4653     }
4654   } else {
4655     if (adr.base_needs_rex()) {
4656       if (adr.index_needs_rex()) {
4657         prefix(REX_WRXB);
4658       } else {
4659         prefix(REX_WRB);
4660       }
4661     } else {
4662       if (adr.index_needs_rex()) {
4663         prefix(REX_WRX);
4664       } else {
4665         prefix(REX_WR);
4666       }
4667     }
4668   }
4669 }
4670 
4671 void Assembler::adcq(Register dst, int32_t imm32) {
4672   (void) prefixq_and_encode(dst->encoding());
4673   emit_arith(0x81, 0xD0, dst, imm32);
4674 }
4675 
4676 void Assembler::adcq(Register dst, Address src) {
4677   InstructionMark im(this);
4678   prefixq(src, dst);
4679   emit_int8(0x13);
4680   emit_operand(dst, src);
4681 }
4682 
4683 void Assembler::adcq(Register dst, Register src) {
4684   (int) prefixq_and_encode(dst->encoding(), src->encoding());
4685   emit_arith(0x13, 0xC0, dst, src);
4686 }
4687 
4688 void Assembler::addq(Address dst, int32_t imm32) {
4689   InstructionMark im(this);
4690   prefixq(dst);
4691   emit_arith_operand(0x81, rax, dst,imm32);
4692 }
4693 
4694 void Assembler::addq(Address dst, Register src) {
4695   InstructionMark im(this);
4696   prefixq(dst, src);
4697   emit_int8(0x01);
4698   emit_operand(src, dst);
4699 }
4700 
4701 void Assembler::addq(Register dst, int32_t imm32) {
4702   (void) prefixq_and_encode(dst->encoding());
4703   emit_arith(0x81, 0xC0, dst, imm32);
4704 }
4705 
4706 void Assembler::addq(Register dst, Address src) {
4707   InstructionMark im(this);
4708   prefixq(src, dst);
4709   emit_int8(0x03);
4710   emit_operand(dst, src);
4711 }
4712 
4713 void Assembler::addq(Register dst, Register src) {
4714   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4715   emit_arith(0x03, 0xC0, dst, src);
4716 }
4717 
4718 void Assembler::andq(Address dst, int32_t imm32) {
4719   InstructionMark im(this);
4720   prefixq(dst);
4721   emit_int8((unsigned char)0x81);
4722   emit_operand(rsp, dst, 4);
4723   emit_long(imm32);
4724 }
4725 
4726 void Assembler::andq(Register dst, int32_t imm32) {
4727   (void) prefixq_and_encode(dst->encoding());
4728   emit_arith(0x81, 0xE0, dst, imm32);
4729 }
4730 
4731 void Assembler::andq(Register dst, Address src) {
4732   InstructionMark im(this);
4733   prefixq(src, dst);
4734   emit_int8(0x23);
4735   emit_operand(dst, src);
4736 }
4737 
4738 void Assembler::andq(Register dst, Register src) {
4739   (int) prefixq_and_encode(dst->encoding(), src->encoding());
4740   emit_arith(0x23, 0xC0, dst, src);
4741 }
4742 
4743 void Assembler::bsfq(Register dst, Register src) {
4744   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4745   emit_int8(0x0F);
4746   emit_int8((unsigned char)0xBC);
4747   emit_int8((unsigned char)(0xC0 | encode));
4748 }
4749 
4750 void Assembler::bsrq(Register dst, Register src) {
4751   assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
4752   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4753   emit_int8(0x0F);
4754   emit_int8((unsigned char)0xBD);
4755   emit_int8((unsigned char)(0xC0 | encode));
4756 }
4757 
4758 void Assembler::bswapq(Register reg) {
4759   int encode = prefixq_and_encode(reg->encoding());
4760   emit_int8(0x0F);
4761   emit_int8((unsigned char)(0xC8 | encode));
4762 }
4763 
4764 void Assembler::cdqq() {
4765   prefix(REX_W);
4766   emit_int8((unsigned char)0x99);
4767 }
4768 
4769 void Assembler::clflush(Address adr) {
4770   prefix(adr);
4771   emit_int8(0x0F);
4772   emit_int8((unsigned char)0xAE);
4773   emit_operand(rdi, adr);
4774 }
4775 
4776 void Assembler::cmovq(Condition cc, Register dst, Register src) {
4777   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4778   emit_int8(0x0F);
4779   emit_int8(0x40 | cc);
4780   emit_int8((unsigned char)(0xC0 | encode));
4781 }
4782 
4783 void Assembler::cmovq(Condition cc, Register dst, Address src) {
4784   InstructionMark im(this);
4785   prefixq(src, dst);
4786   emit_int8(0x0F);
4787   emit_int8(0x40 | cc);
4788   emit_operand(dst, src);
4789 }
4790 
4791 void Assembler::cmpq(Address dst, int32_t imm32) {
4792   InstructionMark im(this);
4793   prefixq(dst);
4794   emit_int8((unsigned char)0x81);
4795   emit_operand(rdi, dst, 4);
4796   emit_long(imm32);
4797 }
4798 
4799 void Assembler::cmpq(Register dst, int32_t imm32) {
4800   (void) prefixq_and_encode(dst->encoding());
4801   emit_arith(0x81, 0xF8, dst, imm32);
4802 }
4803 
4804 void Assembler::cmpq(Address dst, Register src) {
4805   InstructionMark im(this);
4806   prefixq(dst, src);
4807   emit_int8(0x3B);
4808   emit_operand(src, dst);
4809 }
4810 
4811 void Assembler::cmpq(Register dst, Register src) {
4812   (void) prefixq_and_encode(dst->encoding(), src->encoding());
4813   emit_arith(0x3B, 0xC0, dst, src);
4814 }
4815 
4816 void Assembler::cmpq(Register dst, Address  src) {
4817   InstructionMark im(this);
4818   prefixq(src, dst);
4819   emit_int8(0x3B);
4820   emit_operand(dst, src);
4821 }
4822 
4823 void Assembler::cmpxchgq(Register reg, Address adr) {
4824   InstructionMark im(this);
4825   prefixq(adr, reg);
4826   emit_int8(0x0F);
4827   emit_int8((unsigned char)0xB1);
4828   emit_operand(reg, adr);
4829 }
4830 
4831 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
4832   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4833   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
4834   emit_int8(0x2A);
4835   emit_int8((unsigned char)(0xC0 | encode));
4836 }
4837 
4838 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
4839   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4840   InstructionMark im(this);
4841   simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
4842   emit_int8(0x2A);
4843   emit_operand(dst, src);
4844 }
4845 
4846 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
4847   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4848   int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
4849   emit_int8(0x2A);
4850   emit_int8((unsigned char)(0xC0 | encode));
4851 }
4852 
4853 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
4854   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4855   InstructionMark im(this);
4856   simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
4857   emit_int8(0x2A);
4858   emit_operand(dst, src);
4859 }
4860 
4861 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
4862   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4863   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
4864   emit_int8(0x2C);
4865   emit_int8((unsigned char)(0xC0 | encode));
4866 }
4867 
4868 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
4869   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4870   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
4871   emit_int8(0x2C);
4872   emit_int8((unsigned char)(0xC0 | encode));
4873 }
4874 
4875 void Assembler::decl(Register dst) {
4876   // Don't use it directly. Use MacroAssembler::decrementl() instead.
4877   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
4878   int encode = prefix_and_encode(dst->encoding());
4879   emit_int8((unsigned char)0xFF);
4880   emit_int8((unsigned char)(0xC8 | encode));
4881 }
4882 
4883 void Assembler::decq(Register dst) {
4884   // Don't use it directly. Use MacroAssembler::decrementq() instead.
4885   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4886   int encode = prefixq_and_encode(dst->encoding());
4887   emit_int8((unsigned char)0xFF);
4888   emit_int8(0xC8 | encode);
4889 }
4890 
4891 void Assembler::decq(Address dst) {
4892   // Don't use it directly. Use MacroAssembler::decrementq() instead.
4893   InstructionMark im(this);
4894   prefixq(dst);
4895   emit_int8((unsigned char)0xFF);
4896   emit_operand(rcx, dst);
4897 }
4898 
4899 void Assembler::fxrstor(Address src) {
4900   prefixq(src);
4901   emit_int8(0x0F);
4902   emit_int8((unsigned char)0xAE);
4903   emit_operand(as_Register(1), src);
4904 }
4905 
4906 void Assembler::fxsave(Address dst) {
4907   prefixq(dst);
4908   emit_int8(0x0F);
4909   emit_int8((unsigned char)0xAE);
4910   emit_operand(as_Register(0), dst);
4911 }
4912 
4913 void Assembler::idivq(Register src) {
4914   int encode = prefixq_and_encode(src->encoding());
4915   emit_int8((unsigned char)0xF7);
4916   emit_int8((unsigned char)(0xF8 | encode));
4917 }
4918 
4919 void Assembler::imulq(Register dst, Register src) {
4920   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4921   emit_int8(0x0F);
4922   emit_int8((unsigned char)0xAF);
4923   emit_int8((unsigned char)(0xC0 | encode));
4924 }
4925 
4926 void Assembler::imulq(Register dst, Register src, int value) {
4927   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4928   if (is8bit(value)) {
4929     emit_int8(0x6B);
4930     emit_int8((unsigned char)(0xC0 | encode));
4931     emit_int8(value & 0xFF);
4932   } else {
4933     emit_int8(0x69);
4934     emit_int8((unsigned char)(0xC0 | encode));
4935     emit_long(value);
4936   }
4937 }
4938 
4939 void Assembler::incl(Register dst) {
4940   // Don't use it directly. Use MacroAssembler::incrementl() instead.
4941   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4942   int encode = prefix_and_encode(dst->encoding());
4943   emit_int8((unsigned char)0xFF);
4944   emit_int8((unsigned char)(0xC0 | encode));
4945 }
4946 
4947 void Assembler::incq(Register dst) {
4948   // Don't use it directly. Use MacroAssembler::incrementq() instead.
4949   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
4950   int encode = prefixq_and_encode(dst->encoding());
4951   emit_int8((unsigned char)0xFF);
4952   emit_int8((unsigned char)(0xC0 | encode));
4953 }
4954 
4955 void Assembler::incq(Address dst) {
4956   // Don't use it directly. Use MacroAssembler::incrementq() instead.
4957   InstructionMark im(this);
4958   prefixq(dst);
4959   emit_int8((unsigned char)0xFF);
4960   emit_operand(rax, dst);
4961 }
4962 
4963 void Assembler::lea(Register dst, Address src) {
4964   leaq(dst, src);
4965 }
4966 
4967 void Assembler::leaq(Register dst, Address src) {
4968   InstructionMark im(this);
4969   prefixq(src, dst);
4970   emit_int8((unsigned char)0x8D);
4971   emit_operand(dst, src);
4972 }
4973 
4974 void Assembler::mov64(Register dst, int64_t imm64) {
4975   InstructionMark im(this);
4976   int encode = prefixq_and_encode(dst->encoding());
4977   emit_int8((unsigned char)(0xB8 | encode));
4978   emit_int64(imm64);
4979 }
4980 
4981 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
4982   InstructionMark im(this);
4983   int encode = prefixq_and_encode(dst->encoding());
4984   emit_int8(0xB8 | encode);
4985   emit_data64(imm64, rspec);
4986 }
4987 
4988 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
4989   InstructionMark im(this);
4990   int encode = prefix_and_encode(dst->encoding());
4991   emit_int8((unsigned char)(0xB8 | encode));
4992   emit_data((int)imm32, rspec, narrow_oop_operand);
4993 }
4994 
4995 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
4996   InstructionMark im(this);
4997   prefix(dst);
4998   emit_int8((unsigned char)0xC7);
4999   emit_operand(rax, dst, 4);
5000   emit_data((int)imm32, rspec, narrow_oop_operand);
5001 }
5002 
5003 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
5004   InstructionMark im(this);
5005   int encode = prefix_and_encode(src1->encoding());
5006   emit_int8((unsigned char)0x81);
5007   emit_int8((unsigned char)(0xF8 | encode));
5008   emit_data((int)imm32, rspec, narrow_oop_operand);
5009 }
5010 
5011 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
5012   InstructionMark im(this);
5013   prefix(src1);
5014   emit_int8((unsigned char)0x81);
5015   emit_operand(rax, src1, 4);
5016   emit_data((int)imm32, rspec, narrow_oop_operand);
5017 }
5018 
5019 void Assembler::lzcntq(Register dst, Register src) {
5020   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
5021   emit_int8((unsigned char)0xF3);
5022   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5023   emit_int8(0x0F);
5024   emit_int8((unsigned char)0xBD);
5025   emit_int8((unsigned char)(0xC0 | encode));
5026 }
5027 
5028 void Assembler::movdq(XMMRegister dst, Register src) {
5029   // table D-1 says MMX/SSE2
5030   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5031   int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
5032   emit_int8(0x6E);
5033   emit_int8((unsigned char)(0xC0 | encode));
5034 }
5035 
5036 void Assembler::movdq(Register dst, XMMRegister src) {
5037   // table D-1 says MMX/SSE2
5038   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5039   // swap src/dst to get correct prefix
5040   int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
5041   emit_int8(0x7E);
5042   emit_int8((unsigned char)(0xC0 | encode));
5043 }
5044 
5045 void Assembler::movq(Register dst, Register src) {
5046   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5047   emit_int8((unsigned char)0x8B);
5048   emit_int8((unsigned char)(0xC0 | encode));
5049 }
5050 
5051 void Assembler::movq(Register dst, Address src) {
5052   InstructionMark im(this);
5053   prefixq(src, dst);
5054   emit_int8((unsigned char)0x8B);
5055   emit_operand(dst, src);
5056 }
5057 
5058 void Assembler::movq(Address dst, Register src) {
5059   InstructionMark im(this);
5060   prefixq(dst, src);
5061   emit_int8((unsigned char)0x89);
5062   emit_operand(src, dst);
5063 }
5064 
5065 void Assembler::movsbq(Register dst, Address src) {
5066   InstructionMark im(this);
5067   prefixq(src, dst);
5068   emit_int8(0x0F);
5069   emit_int8((unsigned char)0xBE);
5070   emit_operand(dst, src);
5071 }
5072 
5073 void Assembler::movsbq(Register dst, Register src) {
5074   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5075   emit_int8(0x0F);
5076   emit_int8((unsigned char)0xBE);
5077   emit_int8((unsigned char)(0xC0 | encode));
5078 }
5079 
5080 void Assembler::movslq(Register dst, int32_t imm32) {
5081   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
5082   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
5083   // as a result we shouldn't use until tested at runtime...
5084   ShouldNotReachHere();
5085   InstructionMark im(this);
5086   int encode = prefixq_and_encode(dst->encoding());
5087   emit_int8((unsigned char)(0xC7 | encode));
5088   emit_long(imm32);
5089 }
5090 
5091 void Assembler::movslq(Address dst, int32_t imm32) {
5092   assert(is_simm32(imm32), "lost bits");
5093   InstructionMark im(this);
5094   prefixq(dst);
5095   emit_int8((unsigned char)0xC7);
5096   emit_operand(rax, dst, 4);
5097   emit_long(imm32);
5098 }
5099 
5100 void Assembler::movslq(Register dst, Address src) {
5101   InstructionMark im(this);
5102   prefixq(src, dst);
5103   emit_int8(0x63);
5104   emit_operand(dst, src);
5105 }
5106 
5107 void Assembler::movslq(Register dst, Register src) {
5108   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5109   emit_int8(0x63);
5110   emit_int8((unsigned char)(0xC0 | encode));
5111 }
5112 
5113 void Assembler::movswq(Register dst, Address src) {
5114   InstructionMark im(this);
5115   prefixq(src, dst);
5116   emit_int8(0x0F);
5117   emit_int8((unsigned char)0xBF);
5118   emit_operand(dst, src);
5119 }
5120 
5121 void Assembler::movswq(Register dst, Register src) {
5122   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5123   emit_int8((unsigned char)0x0F);
5124   emit_int8((unsigned char)0xBF);
5125   emit_int8((unsigned char)(0xC0 | encode));
5126 }
5127 
5128 void Assembler::movzbq(Register dst, Address src) {
5129   InstructionMark im(this);
5130   prefixq(src, dst);
5131   emit_int8((unsigned char)0x0F);
5132   emit_int8((unsigned char)0xB6);
5133   emit_operand(dst, src);
5134 }
5135 
5136 void Assembler::movzbq(Register dst, Register src) {
5137   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5138   emit_int8(0x0F);
5139   emit_int8((unsigned char)0xB6);
5140   emit_int8(0xC0 | encode);
5141 }
5142 
5143 void Assembler::movzwq(Register dst, Address src) {
5144   InstructionMark im(this);
5145   prefixq(src, dst);
5146   emit_int8((unsigned char)0x0F);
5147   emit_int8((unsigned char)0xB7);
5148   emit_operand(dst, src);
5149 }
5150 
5151 void Assembler::movzwq(Register dst, Register src) {
5152   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5153   emit_int8((unsigned char)0x0F);
5154   emit_int8((unsigned char)0xB7);
5155   emit_int8((unsigned char)(0xC0 | encode));
5156 }
5157 
5158 void Assembler::negq(Register dst) {
5159   int encode = prefixq_and_encode(dst->encoding());
5160   emit_int8((unsigned char)0xF7);
5161   emit_int8((unsigned char)(0xD8 | encode));
5162 }
5163 
5164 void Assembler::notq(Register dst) {
5165   int encode = prefixq_and_encode(dst->encoding());
5166   emit_int8((unsigned char)0xF7);
5167   emit_int8((unsigned char)(0xD0 | encode));
5168 }
5169 
5170 void Assembler::orq(Address dst, int32_t imm32) {
5171   InstructionMark im(this);
5172   prefixq(dst);
5173   emit_int8((unsigned char)0x81);
5174   emit_operand(rcx, dst, 4);
5175   emit_long(imm32);
5176 }
5177 
5178 void Assembler::orq(Register dst, int32_t imm32) {
5179   (void) prefixq_and_encode(dst->encoding());
5180   emit_arith(0x81, 0xC8, dst, imm32);
5181 }
5182 
5183 void Assembler::orq(Register dst, Address src) {
5184   InstructionMark im(this);
5185   prefixq(src, dst);
5186   emit_int8(0x0B);
5187   emit_operand(dst, src);
5188 }
5189 
5190 void Assembler::orq(Register dst, Register src) {
5191   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5192   emit_arith(0x0B, 0xC0, dst, src);
5193 }
5194 
5195 void Assembler::popa() { // 64bit
5196   movq(r15, Address(rsp, 0));
5197   movq(r14, Address(rsp, wordSize));
5198   movq(r13, Address(rsp, 2 * wordSize));
5199   movq(r12, Address(rsp, 3 * wordSize));
5200   movq(r11, Address(rsp, 4 * wordSize));
5201   movq(r10, Address(rsp, 5 * wordSize));
5202   movq(r9,  Address(rsp, 6 * wordSize));
5203   movq(r8,  Address(rsp, 7 * wordSize));
5204   movq(rdi, Address(rsp, 8 * wordSize));
5205   movq(rsi, Address(rsp, 9 * wordSize));
5206   movq(rbp, Address(rsp, 10 * wordSize));
5207   // skip rsp
5208   movq(rbx, Address(rsp, 12 * wordSize));
5209   movq(rdx, Address(rsp, 13 * wordSize));
5210   movq(rcx, Address(rsp, 14 * wordSize));
5211   movq(rax, Address(rsp, 15 * wordSize));
5212 
5213   addq(rsp, 16 * wordSize);
5214 }
5215 
5216 void Assembler::popcntq(Register dst, Address src) {
5217   assert(VM_Version::supports_popcnt(), "must support");
5218   InstructionMark im(this);
5219   emit_int8((unsigned char)0xF3);
5220   prefixq(src, dst);
5221   emit_int8((unsigned char)0x0F);
5222   emit_int8((unsigned char)0xB8);
5223   emit_operand(dst, src);
5224 }
5225 
5226 void Assembler::popcntq(Register dst, Register src) {
5227   assert(VM_Version::supports_popcnt(), "must support");
5228   emit_int8((unsigned char)0xF3);
5229   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5230   emit_int8((unsigned char)0x0F);
5231   emit_int8((unsigned char)0xB8);
5232   emit_int8((unsigned char)(0xC0 | encode));
5233 }
5234 
5235 void Assembler::popq(Address dst) {
5236   InstructionMark im(this);
5237   prefixq(dst);
5238   emit_int8((unsigned char)0x8F);
5239   emit_operand(rax, dst);
5240 }
5241 
5242 void Assembler::pusha() { // 64bit
5243   // we have to store original rsp.  ABI says that 128 bytes
5244   // below rsp are local scratch.
5245   movq(Address(rsp, -5 * wordSize), rsp);
5246 
5247   subq(rsp, 16 * wordSize);
5248 
5249   movq(Address(rsp, 15 * wordSize), rax);
5250   movq(Address(rsp, 14 * wordSize), rcx);
5251   movq(Address(rsp, 13 * wordSize), rdx);
5252   movq(Address(rsp, 12 * wordSize), rbx);
5253   // skip rsp
5254   movq(Address(rsp, 10 * wordSize), rbp);
5255   movq(Address(rsp, 9 * wordSize), rsi);
5256   movq(Address(rsp, 8 * wordSize), rdi);
5257   movq(Address(rsp, 7 * wordSize), r8);
5258   movq(Address(rsp, 6 * wordSize), r9);
5259   movq(Address(rsp, 5 * wordSize), r10);
5260   movq(Address(rsp, 4 * wordSize), r11);
5261   movq(Address(rsp, 3 * wordSize), r12);
5262   movq(Address(rsp, 2 * wordSize), r13);
5263   movq(Address(rsp, wordSize), r14);
5264   movq(Address(rsp, 0), r15);
5265 }
5266 
5267 void Assembler::pushq(Address src) {
5268   InstructionMark im(this);
5269   prefixq(src);
5270   emit_int8((unsigned char)0xFF);
5271   emit_operand(rsi, src);
5272 }
5273 
5274 void Assembler::rclq(Register dst, int imm8) {
5275   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5276   int encode = prefixq_and_encode(dst->encoding());
5277   if (imm8 == 1) {
5278     emit_int8((unsigned char)0xD1);
5279     emit_int8((unsigned char)(0xD0 | encode));
5280   } else {
5281     emit_int8((unsigned char)0xC1);
5282     emit_int8((unsigned char)(0xD0 | encode));
5283     emit_int8(imm8);
5284   }
5285 }
5286 void Assembler::sarq(Register dst, int imm8) {
5287   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5288   int encode = prefixq_and_encode(dst->encoding());
5289   if (imm8 == 1) {
5290     emit_int8((unsigned char)0xD1);
5291     emit_int8((unsigned char)(0xF8 | encode));
5292   } else {
5293     emit_int8((unsigned char)0xC1);
5294     emit_int8((unsigned char)(0xF8 | encode));
5295     emit_int8(imm8);
5296   }
5297 }
5298 
5299 void Assembler::sarq(Register dst) {
5300   int encode = prefixq_and_encode(dst->encoding());
5301   emit_int8((unsigned char)0xD3);
5302   emit_int8((unsigned char)(0xF8 | encode));
5303 }
5304 
5305 void Assembler::sbbq(Address dst, int32_t imm32) {
5306   InstructionMark im(this);
5307   prefixq(dst);
5308   emit_arith_operand(0x81, rbx, dst, imm32);
5309 }
5310 
5311 void Assembler::sbbq(Register dst, int32_t imm32) {
5312   (void) prefixq_and_encode(dst->encoding());
5313   emit_arith(0x81, 0xD8, dst, imm32);
5314 }
5315 
5316 void Assembler::sbbq(Register dst, Address src) {
5317   InstructionMark im(this);
5318   prefixq(src, dst);
5319   emit_int8(0x1B);
5320   emit_operand(dst, src);
5321 }
5322 
5323 void Assembler::sbbq(Register dst, Register src) {
5324   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5325   emit_arith(0x1B, 0xC0, dst, src);
5326 }
5327 
5328 void Assembler::shlq(Register dst, int imm8) {
5329   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5330   int encode = prefixq_and_encode(dst->encoding());
5331   if (imm8 == 1) {
5332     emit_int8((unsigned char)0xD1);
5333     emit_int8((unsigned char)(0xE0 | encode));
5334   } else {
5335     emit_int8((unsigned char)0xC1);
5336     emit_int8((unsigned char)(0xE0 | encode));
5337     emit_int8(imm8);
5338   }
5339 }
5340 
5341 void Assembler::shlq(Register dst) {
5342   int encode = prefixq_and_encode(dst->encoding());
5343   emit_int8((unsigned char)0xD3);
5344   emit_int8((unsigned char)(0xE0 | encode));
5345 }
5346 
5347 void Assembler::shrq(Register dst, int imm8) {
5348   assert(isShiftCount(imm8 >> 1), "illegal shift count");
5349   int encode = prefixq_and_encode(dst->encoding());
5350   emit_int8((unsigned char)0xC1);
5351   emit_int8((unsigned char)(0xE8 | encode));
5352   emit_int8(imm8);
5353 }
5354 
5355 void Assembler::shrq(Register dst) {
5356   int encode = prefixq_and_encode(dst->encoding());
5357   emit_int8((unsigned char)0xD3);
5358   emit_int8(0xE8 | encode);
5359 }
5360 
5361 void Assembler::subq(Address dst, int32_t imm32) {
5362   InstructionMark im(this);
5363   prefixq(dst);
5364   emit_arith_operand(0x81, rbp, dst, imm32);
5365 }
5366 
5367 void Assembler::subq(Address dst, Register src) {
5368   InstructionMark im(this);
5369   prefixq(dst, src);
5370   emit_int8(0x29);
5371   emit_operand(src, dst);
5372 }
5373 
5374 void Assembler::subq(Register dst, int32_t imm32) {
5375   (void) prefixq_and_encode(dst->encoding());
5376   emit_arith(0x81, 0xE8, dst, imm32);
5377 }
5378 
5379 // Force generation of a 4 byte immediate value even if it fits into 8bit
5380 void Assembler::subq_imm32(Register dst, int32_t imm32) {
5381   (void) prefixq_and_encode(dst->encoding());
5382   emit_arith_imm32(0x81, 0xE8, dst, imm32);
5383 }
5384 
5385 void Assembler::subq(Register dst, Address src) {
5386   InstructionMark im(this);
5387   prefixq(src, dst);
5388   emit_int8(0x2B);
5389   emit_operand(dst, src);
5390 }
5391 
5392 void Assembler::subq(Register dst, Register src) {
5393   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5394   emit_arith(0x2B, 0xC0, dst, src);
5395 }
5396 
5397 void Assembler::testq(Register dst, int32_t imm32) {
5398   // not using emit_arith because test
5399   // doesn't support sign-extension of
5400   // 8bit operands
5401   int encode = dst->encoding();
5402   if (encode == 0) {
5403     prefix(REX_W);
5404     emit_int8((unsigned char)0xA9);
5405   } else {
5406     encode = prefixq_and_encode(encode);
5407     emit_int8((unsigned char)0xF7);
5408     emit_int8((unsigned char)(0xC0 | encode));
5409   }
5410   emit_long(imm32);
5411 }
5412 
5413 void Assembler::testq(Register dst, Register src) {
5414   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5415   emit_arith(0x85, 0xC0, dst, src);
5416 }
5417 
5418 void Assembler::xaddq(Address dst, Register src) {
5419   InstructionMark im(this);
5420   prefixq(dst, src);
5421   emit_int8(0x0F);
5422   emit_int8((unsigned char)0xC1);
5423   emit_operand(src, dst);
5424 }
5425 
5426 void Assembler::xchgq(Register dst, Address src) {
5427   InstructionMark im(this);
5428   prefixq(src, dst);
5429   emit_int8((unsigned char)0x87);
5430   emit_operand(dst, src);
5431 }
5432 
5433 void Assembler::xchgq(Register dst, Register src) {
5434   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
5435   emit_int8((unsigned char)0x87);
5436   emit_int8((unsigned char)(0xc0 | encode));
5437 }
5438 
5439 void Assembler::xorq(Register dst, Register src) {
5440   (void) prefixq_and_encode(dst->encoding(), src->encoding());
5441   emit_arith(0x33, 0xC0, dst, src);
5442 }
5443 
5444 void Assembler::xorq(Register dst, Address src) {
5445   InstructionMark im(this);
5446   prefixq(src, dst);
5447   emit_int8(0x33);
5448   emit_operand(dst, src);
5449 }
5450 
5451 #endif // !LP64