1 /*
   2  * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "c1/c1_Compilation.hpp"
  29 #include "c1/c1_LIRAssembler.hpp"
  30 #include "c1/c1_MacroAssembler.hpp"
  31 #include "c1/c1_Runtime1.hpp"
  32 #include "c1/c1_ValueStack.hpp"
  33 #include "ci/ciArrayKlass.hpp"
  34 #include "ci/ciInstance.hpp"
  35 #include "gc/shenandoah/shenandoahHeap.hpp"
  36 #include "gc/shared/barrierSet.hpp"
  37 #include "gc/shared/cardTableModRefBS.hpp"
  38 #include "gc/shared/collectedHeap.hpp"
  39 #include "nativeInst_x86.hpp"
  40 #include "oops/objArrayKlass.hpp"
  41 #include "runtime/sharedRuntime.hpp"
  42 #include "vmreg_x86.inline.hpp"
  43 
  44 
  45 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  46 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  47 // fast versions of NegF/NegD and AbsF/AbsD.
  48 
  49 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  50 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  51   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  52   // of 128-bits operands for SSE instructions.
  53   jlong *operand = (jlong*)(((intptr_t)adr) & ((intptr_t)(~0xF)));
  54   // Store the value to a 128-bits operand.
  55   operand[0] = lo;
  56   operand[1] = hi;
  57   return operand;
  58 }
  59 
  60 // Buffer for 128-bits masks used by SSE instructions.
  61 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  62 
  63 // Static initialization during VM startup.
  64 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2],         CONST64(0x7FFFFFFF7FFFFFFF),         CONST64(0x7FFFFFFF7FFFFFFF));
  65 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2],         CONST64(0x7FFFFFFFFFFFFFFF),         CONST64(0x7FFFFFFFFFFFFFFF));
  66 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], (jlong)UCONST64(0x8000000080000000), (jlong)UCONST64(0x8000000080000000));
  67 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], (jlong)UCONST64(0x8000000000000000), (jlong)UCONST64(0x8000000000000000));
  68 
  69 
  70 
  71 NEEDS_CLEANUP // remove this definitions ?
  72 const Register IC_Klass    = rax;   // where the IC klass is cached
  73 const Register SYNC_header = rax;   // synchronization header
  74 const Register SHIFT_count = rcx;   // where count for shift operations must be
  75 
  76 #define __ _masm->
  77 
  78 
  79 static void select_different_registers(Register preserve,
  80                                        Register extra,
  81                                        Register &tmp1,
  82                                        Register &tmp2) {
  83   if (tmp1 == preserve) {
  84     assert_different_registers(tmp1, tmp2, extra);
  85     tmp1 = extra;
  86   } else if (tmp2 == preserve) {
  87     assert_different_registers(tmp1, tmp2, extra);
  88     tmp2 = extra;
  89   }
  90   assert_different_registers(preserve, tmp1, tmp2);
  91 }
  92 
  93 
  94 
  95 static void select_different_registers(Register preserve,
  96                                        Register extra,
  97                                        Register &tmp1,
  98                                        Register &tmp2,
  99                                        Register &tmp3) {
 100   if (tmp1 == preserve) {
 101     assert_different_registers(tmp1, tmp2, tmp3, extra);
 102     tmp1 = extra;
 103   } else if (tmp2 == preserve) {
 104     assert_different_registers(tmp1, tmp2, tmp3, extra);
 105     tmp2 = extra;
 106   } else if (tmp3 == preserve) {
 107     assert_different_registers(tmp1, tmp2, tmp3, extra);
 108     tmp3 = extra;
 109   }
 110   assert_different_registers(preserve, tmp1, tmp2, tmp3);
 111 }
 112 
 113 
 114 
 115 bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
 116   if (opr->is_constant()) {
 117     LIR_Const* constant = opr->as_constant_ptr();
 118     switch (constant->type()) {
 119       case T_INT: {
 120         return true;
 121       }
 122 
 123       default:
 124         return false;
 125     }
 126   }
 127   return false;
 128 }
 129 
 130 
 131 LIR_Opr LIR_Assembler::receiverOpr() {
 132   return FrameMap::receiver_opr;
 133 }
 134 
 135 LIR_Opr LIR_Assembler::osrBufferPointer() {
 136   return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 137 }
 138 
 139 //--------------fpu register translations-----------------------
 140 
 141 
 142 address LIR_Assembler::float_constant(float f) {
 143   address const_addr = __ float_constant(f);
 144   if (const_addr == NULL) {
 145     bailout("const section overflow");
 146     return __ code()->consts()->start();
 147   } else {
 148     return const_addr;
 149   }
 150 }
 151 
 152 
 153 address LIR_Assembler::double_constant(double d) {
 154   address const_addr = __ double_constant(d);
 155   if (const_addr == NULL) {
 156     bailout("const section overflow");
 157     return __ code()->consts()->start();
 158   } else {
 159     return const_addr;
 160   }
 161 }
 162 
 163 
 164 void LIR_Assembler::set_24bit_FPU() {
 165   __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 166 }
 167 
 168 void LIR_Assembler::reset_FPU() {
 169   __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 170 }
 171 
 172 void LIR_Assembler::fpop() {
 173   __ fpop();
 174 }
 175 
 176 void LIR_Assembler::fxch(int i) {
 177   __ fxch(i);
 178 }
 179 
 180 void LIR_Assembler::fld(int i) {
 181   __ fld_s(i);
 182 }
 183 
 184 void LIR_Assembler::ffree(int i) {
 185   __ ffree(i);
 186 }
 187 
 188 void LIR_Assembler::breakpoint() {
 189   __ int3();
 190 }
 191 
 192 void LIR_Assembler::push(LIR_Opr opr) {
 193   if (opr->is_single_cpu()) {
 194     __ push_reg(opr->as_register());
 195   } else if (opr->is_double_cpu()) {
 196     NOT_LP64(__ push_reg(opr->as_register_hi()));
 197     __ push_reg(opr->as_register_lo());
 198   } else if (opr->is_stack()) {
 199     __ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
 200   } else if (opr->is_constant()) {
 201     LIR_Const* const_opr = opr->as_constant_ptr();
 202     if (const_opr->type() == T_OBJECT) {
 203       __ push_oop(const_opr->as_jobject());
 204     } else if (const_opr->type() == T_INT) {
 205       __ push_jint(const_opr->as_jint());
 206     } else {
 207       ShouldNotReachHere();
 208     }
 209 
 210   } else {
 211     ShouldNotReachHere();
 212   }
 213 }
 214 
 215 void LIR_Assembler::pop(LIR_Opr opr) {
 216   if (opr->is_single_cpu()) {
 217     __ pop_reg(opr->as_register());
 218   } else {
 219     ShouldNotReachHere();
 220   }
 221 }
 222 
 223 bool LIR_Assembler::is_literal_address(LIR_Address* addr) {
 224   return addr->base()->is_illegal() && addr->index()->is_illegal();
 225 }
 226 
 227 //-------------------------------------------
 228 
 229 Address LIR_Assembler::as_Address(LIR_Address* addr) {
 230   return as_Address(addr, rscratch1);
 231 }
 232 
 233 Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
 234   if (addr->base()->is_illegal()) {
 235     assert(addr->index()->is_illegal(), "must be illegal too");
 236     AddressLiteral laddr((address)addr->disp(), relocInfo::none);
 237     if (! __ reachable(laddr)) {
 238       __ movptr(tmp, laddr.addr());
 239       Address res(tmp, 0);
 240       return res;
 241     } else {
 242       return __ as_Address(laddr);
 243     }
 244   }
 245 
 246   Register base = addr->base()->as_pointer_register();
 247 
 248   if (addr->index()->is_illegal()) {
 249     return Address( base, addr->disp());
 250   } else if (addr->index()->is_cpu_register()) {
 251     Register index = addr->index()->as_pointer_register();
 252     return Address(base, index, (Address::ScaleFactor) addr->scale(), addr->disp());
 253   } else if (addr->index()->is_constant()) {
 254     intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp();
 255     assert(Assembler::is_simm32(addr_offset), "must be");
 256 
 257     return Address(base, addr_offset);
 258   } else {
 259     Unimplemented();
 260     return Address();
 261   }
 262 }
 263 
 264 
 265 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
 266   Address base = as_Address(addr);
 267   return Address(base._base, base._index, base._scale, base._disp + BytesPerWord);
 268 }
 269 
 270 
 271 Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
 272   return as_Address(addr);
 273 }
 274 
 275 
 276 void LIR_Assembler::osr_entry() {
 277   offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
 278   BlockBegin* osr_entry = compilation()->hir()->osr_entry();
 279   ValueStack* entry_state = osr_entry->state();
 280   int number_of_locks = entry_state->locks_size();
 281 
 282   // we jump here if osr happens with the interpreter
 283   // state set up to continue at the beginning of the
 284   // loop that triggered osr - in particular, we have
 285   // the following registers setup:
 286   //
 287   // rcx: osr buffer
 288   //
 289 
 290   // build frame
 291   ciMethod* m = compilation()->method();
 292   __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 293 
 294   // OSR buffer is
 295   //
 296   // locals[nlocals-1..0]
 297   // monitors[0..number_of_locks]
 298   //
 299   // locals is a direct copy of the interpreter frame so in the osr buffer
 300   // so first slot in the local array is the last local from the interpreter
 301   // and last slot is local[0] (receiver) from the interpreter
 302   //
 303   // Similarly with locks. The first lock slot in the osr buffer is the nth lock
 304   // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
 305   // in the interpreter frame (the method lock if a sync method)
 306 
 307   // Initialize monitors in the compiled activation.
 308   //   rcx: pointer to osr buffer
 309   //
 310   // All other registers are dead at this point and the locals will be
 311   // copied into place by code emitted in the IR.
 312 
 313   Register OSR_buf = osrBufferPointer()->as_pointer_register();
 314   { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
 315     int monitor_offset = BytesPerWord * method()->max_locals() +
 316       (2 * BytesPerWord) * (number_of_locks - 1);
 317     // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
 318     // the OSR buffer using 2 word entries: first the lock and then
 319     // the oop.
 320     for (int i = 0; i < number_of_locks; i++) {
 321       int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
 322 #ifdef ASSERT
 323       // verify the interpreter's monitor has a non-null object
 324       {
 325         Label L;
 326         __ cmpptr(Address(OSR_buf, slot_offset + 1*BytesPerWord), (int32_t)NULL_WORD);
 327         __ jcc(Assembler::notZero, L);
 328         __ stop("locked object is NULL");
 329         __ bind(L);
 330       }
 331 #endif
 332       __ movptr(rbx, Address(OSR_buf, slot_offset + 0));
 333       __ movptr(frame_map()->address_for_monitor_lock(i), rbx);
 334       __ movptr(rbx, Address(OSR_buf, slot_offset + 1*BytesPerWord));
 335       __ movptr(frame_map()->address_for_monitor_object(i), rbx);
 336     }
 337   }
 338 }
 339 
 340 
 341 // inline cache check; done before the frame is built.
 342 int LIR_Assembler::check_icache() {
 343   Register receiver = FrameMap::receiver_opr->as_register();
 344   Register ic_klass = IC_Klass;
 345   const int ic_cmp_size = LP64_ONLY(10) NOT_LP64(9);
 346   const bool do_post_padding = VerifyOops || UseCompressedClassPointers;
 347   if (!do_post_padding) {
 348     // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
 349     __ align(CodeEntryAlignment, __ offset() + ic_cmp_size);
 350   }
 351   int offset = __ offset();
 352   __ inline_cache_check(receiver, IC_Klass);
 353   assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct");
 354   if (do_post_padding) {
 355     // force alignment after the cache check.
 356     // It's been verified to be aligned if !VerifyOops
 357     __ align(CodeEntryAlignment);
 358   }
 359   return offset;
 360 }
 361 
 362 
 363 void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
 364   jobject o = NULL;
 365   PatchingStub* patch = new PatchingStub(_masm, patching_id(info));
 366   __ movoop(reg, o);
 367   patching_epilog(patch, lir_patch_normal, reg, info);
 368 }
 369 
 370 void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
 371   Metadata* o = NULL;
 372   PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
 373   __ mov_metadata(reg, o);
 374   patching_epilog(patch, lir_patch_normal, reg, info);
 375 }
 376 
 377 // This specifies the rsp decrement needed to build the frame
 378 int LIR_Assembler::initial_frame_size_in_bytes() const {
 379   // if rounding, must let FrameMap know!
 380 
 381   // The frame_map records size in slots (32bit word)
 382 
 383   // subtract two words to account for return address and link
 384   return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
 385 }
 386 
 387 
 388 int LIR_Assembler::emit_exception_handler() {
 389   // if the last instruction is a call (typically to do a throw which
 390   // is coming at the end after block reordering) the return address
 391   // must still point into the code area in order to avoid assertion
 392   // failures when searching for the corresponding bci => add a nop
 393   // (was bug 5/14/1999 - gri)
 394   __ nop();
 395 
 396   // generate code for exception handler
 397   address handler_base = __ start_a_stub(exception_handler_size);
 398   if (handler_base == NULL) {
 399     // not enough space left for the handler
 400     bailout("exception handler overflow");
 401     return -1;
 402   }
 403 
 404   int offset = code_offset();
 405 
 406   // the exception oop and pc are in rax, and rdx
 407   // no other registers need to be preserved, so invalidate them
 408   __ invalidate_registers(false, true, true, false, true, true);
 409 
 410   // check that there is really an exception
 411   __ verify_not_null_oop(rax);
 412 
 413   // search an exception handler (rax: exception oop, rdx: throwing pc)
 414   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
 415   __ should_not_reach_here();
 416   guarantee(code_offset() - offset <= exception_handler_size, "overflow");
 417   __ end_a_stub();
 418 
 419   return offset;
 420 }
 421 
 422 
 423 // Emit the code to remove the frame from the stack in the exception
 424 // unwind path.
 425 int LIR_Assembler::emit_unwind_handler() {
 426 #ifndef PRODUCT
 427   if (CommentedAssembly) {
 428     _masm->block_comment("Unwind handler");
 429   }
 430 #endif
 431 
 432   int offset = code_offset();
 433 
 434   // Fetch the exception from TLS and clear out exception related thread state
 435   Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread);
 436   NOT_LP64(__ get_thread(rsi));
 437   __ movptr(rax, Address(thread, JavaThread::exception_oop_offset()));
 438   __ movptr(Address(thread, JavaThread::exception_oop_offset()), (intptr_t)NULL_WORD);
 439   __ movptr(Address(thread, JavaThread::exception_pc_offset()), (intptr_t)NULL_WORD);
 440 
 441   __ bind(_unwind_handler_entry);
 442   __ verify_not_null_oop(rax);
 443   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 444     __ mov(rbx, rax);  // Preserve the exception (rbx is always callee-saved)
 445   }
 446 
 447   // Preform needed unlocking
 448   MonitorExitStub* stub = NULL;
 449   if (method()->is_synchronized()) {
 450     monitor_address(0, FrameMap::rax_opr);
 451     stub = new MonitorExitStub(FrameMap::rax_opr, true, 0);
 452     __ unlock_object(rdi, rsi, rax, *stub->entry());
 453     __ bind(*stub->continuation());
 454   }
 455 
 456   if (compilation()->env()->dtrace_method_probes()) {
 457 #ifdef _LP64
 458     __ mov(rdi, r15_thread);
 459     __ mov_metadata(rsi, method()->constant_encoding());
 460 #else
 461     __ get_thread(rax);
 462     __ movptr(Address(rsp, 0), rax);
 463     __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding());
 464 #endif
 465     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
 466   }
 467 
 468   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 469     __ mov(rax, rbx);  // Restore the exception
 470   }
 471 
 472   // remove the activation and dispatch to the unwind handler
 473   __ remove_frame(initial_frame_size_in_bytes());
 474   __ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 475 
 476   // Emit the slow path assembly
 477   if (stub != NULL) {
 478     stub->emit_code(this);
 479   }
 480 
 481   return offset;
 482 }
 483 
 484 
 485 int LIR_Assembler::emit_deopt_handler() {
 486   // if the last instruction is a call (typically to do a throw which
 487   // is coming at the end after block reordering) the return address
 488   // must still point into the code area in order to avoid assertion
 489   // failures when searching for the corresponding bci => add a nop
 490   // (was bug 5/14/1999 - gri)
 491   __ nop();
 492 
 493   // generate code for exception handler
 494   address handler_base = __ start_a_stub(deopt_handler_size);
 495   if (handler_base == NULL) {
 496     // not enough space left for the handler
 497     bailout("deopt handler overflow");
 498     return -1;
 499   }
 500 
 501   int offset = code_offset();
 502   InternalAddress here(__ pc());
 503 
 504   __ pushptr(here.addr());
 505   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 506   guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
 507   __ end_a_stub();
 508 
 509   return offset;
 510 }
 511 
 512 
 513 // This is the fast version of java.lang.String.compare; it has not
 514 // OSR-entry and therefore, we generate a slow version for OSR's
 515 void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
 516   __ movptr (rbx, rcx); // receiver is in rcx
 517   __ movptr (rax, arg1->as_register());
 518 
 519   // Get addresses of first characters from both Strings
 520   __ load_heap_oop(rsi, Address(rax, java_lang_String::value_offset_in_bytes()));
 521   if (java_lang_String::has_offset_field()) {
 522     __ movptr     (rcx, Address(rax, java_lang_String::offset_offset_in_bytes()));
 523     __ movl       (rax, Address(rax, java_lang_String::count_offset_in_bytes()));
 524     __ lea        (rsi, Address(rsi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 525   } else {
 526     __ movl       (rax, Address(rsi, arrayOopDesc::length_offset_in_bytes()));
 527     __ lea        (rsi, Address(rsi, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 528   }
 529 
 530   // rbx, may be NULL
 531   add_debug_info_for_null_check_here(info);
 532   __ load_heap_oop(rdi, Address(rbx, java_lang_String::value_offset_in_bytes()));
 533   if (java_lang_String::has_offset_field()) {
 534     __ movptr     (rcx, Address(rbx, java_lang_String::offset_offset_in_bytes()));
 535     __ movl       (rbx, Address(rbx, java_lang_String::count_offset_in_bytes()));
 536     __ lea        (rdi, Address(rdi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 537   } else {
 538     __ movl       (rbx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
 539     __ lea        (rdi, Address(rdi, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 540   }
 541 
 542   // compute minimum length (in rax) and difference of lengths (on top of stack)
 543   __ mov   (rcx, rbx);
 544   __ subptr(rbx, rax); // subtract lengths
 545   __ push  (rbx);      // result
 546   __ cmov  (Assembler::lessEqual, rax, rcx);
 547 
 548   // is minimum length 0?
 549   Label noLoop, haveResult;
 550   __ testptr (rax, rax);
 551   __ jcc (Assembler::zero, noLoop);
 552 
 553   // compare first characters
 554   __ load_unsigned_short(rcx, Address(rdi, 0));
 555   __ load_unsigned_short(rbx, Address(rsi, 0));
 556   __ subl(rcx, rbx);
 557   __ jcc(Assembler::notZero, haveResult);
 558   // starting loop
 559   __ decrement(rax); // we already tested index: skip one
 560   __ jcc(Assembler::zero, noLoop);
 561 
 562   // set rsi.edi to the end of the arrays (arrays have same length)
 563   // negate the index
 564 
 565   __ lea(rsi, Address(rsi, rax, Address::times_2, type2aelembytes(T_CHAR)));
 566   __ lea(rdi, Address(rdi, rax, Address::times_2, type2aelembytes(T_CHAR)));
 567   __ negptr(rax);
 568 
 569   // compare the strings in a loop
 570 
 571   Label loop;
 572   __ align(wordSize);
 573   __ bind(loop);
 574   __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
 575   __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
 576   __ subl(rcx, rbx);
 577   __ jcc(Assembler::notZero, haveResult);
 578   __ increment(rax);
 579   __ jcc(Assembler::notZero, loop);
 580 
 581   // strings are equal up to min length
 582 
 583   __ bind(noLoop);
 584   __ pop(rax);
 585   return_op(LIR_OprFact::illegalOpr);
 586 
 587   __ bind(haveResult);
 588   // leave instruction is going to discard the TOS value
 589   __ mov (rax, rcx); // result of call is in rax,
 590 }
 591 
 592 
 593 void LIR_Assembler::return_op(LIR_Opr result) {
 594   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
 595   if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
 596     assert(result->fpu() == 0, "result must already be on TOS");
 597   }
 598 
 599   // Pop the stack before the safepoint code
 600   __ remove_frame(initial_frame_size_in_bytes());
 601 
 602   bool result_is_oop = result->is_valid() ? result->is_oop() : false;
 603 
 604   // Note: we do not need to round double result; float result has the right precision
 605   // the poll sets the condition code, but no data registers
 606   AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
 607 
 608   if (Assembler::is_polling_page_far()) {
 609     __ lea(rscratch1, polling_page);
 610     __ relocate(relocInfo::poll_return_type);
 611     __ testl(rax, Address(rscratch1, 0));
 612   } else {
 613     __ testl(rax, polling_page);
 614   }
 615   __ ret(0);
 616 }
 617 
 618 
 619 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
 620   AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_type);
 621   guarantee(info != NULL, "Shouldn't be NULL");
 622   int offset = __ offset();
 623   if (Assembler::is_polling_page_far()) {
 624     __ lea(rscratch1, polling_page);
 625     offset = __ offset();
 626     add_debug_info_for_branch(info);
 627     __ relocate(relocInfo::poll_type);
 628     __ testl(rax, Address(rscratch1, 0));
 629   } else {
 630     add_debug_info_for_branch(info);
 631     __ testl(rax, polling_page);
 632   }
 633   return offset;
 634 }
 635 
 636 
 637 void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
 638   if (from_reg != to_reg) __ mov(to_reg, from_reg);
 639 }
 640 
 641 void LIR_Assembler::swap_reg(Register a, Register b) {
 642   __ xchgptr(a, b);
 643 }
 644 
 645 
 646 void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
 647   assert(src->is_constant(), "should not call otherwise");
 648   assert(dest->is_register(), "should not call otherwise");
 649   LIR_Const* c = src->as_constant_ptr();
 650 
 651   switch (c->type()) {
 652     case T_INT: {
 653       assert(patch_code == lir_patch_none, "no patching handled here");
 654       __ movl(dest->as_register(), c->as_jint());
 655       break;
 656     }
 657 
 658     case T_ADDRESS: {
 659       assert(patch_code == lir_patch_none, "no patching handled here");
 660       __ movptr(dest->as_register(), c->as_jint());
 661       break;
 662     }
 663 
 664     case T_LONG: {
 665       assert(patch_code == lir_patch_none, "no patching handled here");
 666 #ifdef _LP64
 667       __ movptr(dest->as_register_lo(), (intptr_t)c->as_jlong());
 668 #else
 669       __ movptr(dest->as_register_lo(), c->as_jint_lo());
 670       __ movptr(dest->as_register_hi(), c->as_jint_hi());
 671 #endif // _LP64
 672       break;
 673     }
 674 
 675     case T_OBJECT: {
 676       if (patch_code != lir_patch_none) {
 677         jobject2reg_with_patching(dest->as_register(), info);
 678       } else {
 679         __ movoop(dest->as_register(), c->as_jobject());
 680       }
 681       break;
 682     }
 683 
 684     case T_METADATA: {
 685       if (patch_code != lir_patch_none) {
 686         klass2reg_with_patching(dest->as_register(), info);
 687       } else {
 688         __ mov_metadata(dest->as_register(), c->as_metadata());
 689       }
 690       break;
 691     }
 692 
 693     case T_FLOAT: {
 694       if (dest->is_single_xmm()) {
 695         if (c->is_zero_float()) {
 696           __ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg());
 697         } else {
 698           __ movflt(dest->as_xmm_float_reg(),
 699                    InternalAddress(float_constant(c->as_jfloat())));
 700         }
 701       } else {
 702         assert(dest->is_single_fpu(), "must be");
 703         assert(dest->fpu_regnr() == 0, "dest must be TOS");
 704         if (c->is_zero_float()) {
 705           __ fldz();
 706         } else if (c->is_one_float()) {
 707           __ fld1();
 708         } else {
 709           __ fld_s (InternalAddress(float_constant(c->as_jfloat())));
 710         }
 711       }
 712       break;
 713     }
 714 
 715     case T_DOUBLE: {
 716       if (dest->is_double_xmm()) {
 717         if (c->is_zero_double()) {
 718           __ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg());
 719         } else {
 720           __ movdbl(dest->as_xmm_double_reg(),
 721                     InternalAddress(double_constant(c->as_jdouble())));
 722         }
 723       } else {
 724         assert(dest->is_double_fpu(), "must be");
 725         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
 726         if (c->is_zero_double()) {
 727           __ fldz();
 728         } else if (c->is_one_double()) {
 729           __ fld1();
 730         } else {
 731           __ fld_d (InternalAddress(double_constant(c->as_jdouble())));
 732         }
 733       }
 734       break;
 735     }
 736 
 737     default:
 738       ShouldNotReachHere();
 739   }
 740 }
 741 
 742 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
 743   assert(src->is_constant(), "should not call otherwise");
 744   assert(dest->is_stack(), "should not call otherwise");
 745   LIR_Const* c = src->as_constant_ptr();
 746 
 747   switch (c->type()) {
 748     case T_INT:  // fall through
 749     case T_FLOAT:
 750       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 751       break;
 752 
 753     case T_ADDRESS:
 754       __ movptr(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 755       break;
 756 
 757     case T_OBJECT:
 758       __ movoop(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jobject());
 759       break;
 760 
 761     case T_LONG:  // fall through
 762     case T_DOUBLE:
 763 #ifdef _LP64
 764       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 765                                             lo_word_offset_in_bytes), (intptr_t)c->as_jlong_bits());
 766 #else
 767       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 768                                               lo_word_offset_in_bytes), c->as_jint_lo_bits());
 769       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 770                                               hi_word_offset_in_bytes), c->as_jint_hi_bits());
 771 #endif // _LP64
 772       break;
 773 
 774     default:
 775       ShouldNotReachHere();
 776   }
 777 }
 778 
 779 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
 780   assert(src->is_constant(), "should not call otherwise");
 781   assert(dest->is_address(), "should not call otherwise");
 782   LIR_Const* c = src->as_constant_ptr();
 783   LIR_Address* addr = dest->as_address_ptr();
 784 
 785   int null_check_here = code_offset();
 786   switch (type) {
 787     case T_INT:    // fall through
 788     case T_FLOAT:
 789       __ movl(as_Address(addr), c->as_jint_bits());
 790       break;
 791 
 792     case T_ADDRESS:
 793       __ movptr(as_Address(addr), c->as_jint_bits());
 794       break;
 795 
 796     case T_OBJECT:  // fall through
 797     case T_ARRAY:
 798       if (c->as_jobject() == NULL) {
 799         if (UseCompressedOops && !wide) {
 800           __ movl(as_Address(addr), (int32_t)NULL_WORD);
 801         } else {
 802 #ifdef _LP64
 803           __ xorptr(rscratch1, rscratch1);
 804           null_check_here = code_offset();
 805           __ movptr(as_Address(addr), rscratch1);
 806 #else
 807           __ movptr(as_Address(addr), NULL_WORD);
 808 #endif
 809         }
 810       } else {
 811         if (is_literal_address(addr)) {
 812           ShouldNotReachHere();
 813           __ movoop(as_Address(addr, noreg), c->as_jobject());
 814         } else {
 815 #ifdef _LP64
 816           __ movoop(rscratch1, c->as_jobject());
 817           if (UseCompressedOops && !wide) {
 818             __ encode_heap_oop(rscratch1);
 819             null_check_here = code_offset();
 820             __ movl(as_Address_lo(addr), rscratch1);
 821           } else {
 822             null_check_here = code_offset();
 823             __ movptr(as_Address_lo(addr), rscratch1);
 824           }
 825 #else
 826           __ movoop(as_Address(addr), c->as_jobject());
 827 #endif
 828         }
 829       }
 830       break;
 831 
 832     case T_LONG:    // fall through
 833     case T_DOUBLE:
 834 #ifdef _LP64
 835       if (is_literal_address(addr)) {
 836         ShouldNotReachHere();
 837         __ movptr(as_Address(addr, r15_thread), (intptr_t)c->as_jlong_bits());
 838       } else {
 839         __ movptr(r10, (intptr_t)c->as_jlong_bits());
 840         null_check_here = code_offset();
 841         __ movptr(as_Address_lo(addr), r10);
 842       }
 843 #else
 844       // Always reachable in 32bit so this doesn't produce useless move literal
 845       __ movptr(as_Address_hi(addr), c->as_jint_hi_bits());
 846       __ movptr(as_Address_lo(addr), c->as_jint_lo_bits());
 847 #endif // _LP64
 848       break;
 849 
 850     case T_BOOLEAN: // fall through
 851     case T_BYTE:
 852       __ movb(as_Address(addr), c->as_jint() & 0xFF);
 853       break;
 854 
 855     case T_CHAR:    // fall through
 856     case T_SHORT:
 857       __ movw(as_Address(addr), c->as_jint() & 0xFFFF);
 858       break;
 859 
 860     default:
 861       ShouldNotReachHere();
 862   };
 863 
 864   if (info != NULL) {
 865     add_debug_info_for_null_check(null_check_here, info);
 866   }
 867 }
 868 
 869 
 870 void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
 871   assert(src->is_register(), "should not call otherwise");
 872   assert(dest->is_register(), "should not call otherwise");
 873 
 874   // move between cpu-registers
 875   if (dest->is_single_cpu()) {
 876 #ifdef _LP64
 877     if (src->type() == T_LONG) {
 878       // Can do LONG -> OBJECT
 879       move_regs(src->as_register_lo(), dest->as_register());
 880       return;
 881     }
 882 #endif
 883     assert(src->is_single_cpu(), "must match");
 884     if (src->type() == T_OBJECT) {
 885       __ verify_oop(src->as_register());
 886     }
 887     move_regs(src->as_register(), dest->as_register());
 888 
 889   } else if (dest->is_double_cpu()) {
 890 #ifdef _LP64
 891     if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
 892       // Surprising to me but we can see move of a long to t_object
 893       __ verify_oop(src->as_register());
 894       move_regs(src->as_register(), dest->as_register_lo());
 895       return;
 896     }
 897 #endif
 898     assert(src->is_double_cpu(), "must match");
 899     Register f_lo = src->as_register_lo();
 900     Register f_hi = src->as_register_hi();
 901     Register t_lo = dest->as_register_lo();
 902     Register t_hi = dest->as_register_hi();
 903 #ifdef _LP64
 904     assert(f_hi == f_lo, "must be same");
 905     assert(t_hi == t_lo, "must be same");
 906     move_regs(f_lo, t_lo);
 907 #else
 908     assert(f_lo != f_hi && t_lo != t_hi, "invalid register allocation");
 909 
 910 
 911     if (f_lo == t_hi && f_hi == t_lo) {
 912       swap_reg(f_lo, f_hi);
 913     } else if (f_hi == t_lo) {
 914       assert(f_lo != t_hi, "overwriting register");
 915       move_regs(f_hi, t_hi);
 916       move_regs(f_lo, t_lo);
 917     } else {
 918       assert(f_hi != t_lo, "overwriting register");
 919       move_regs(f_lo, t_lo);
 920       move_regs(f_hi, t_hi);
 921     }
 922 #endif // LP64
 923 
 924     // special moves from fpu-register to xmm-register
 925     // necessary for method results
 926   } else if (src->is_single_xmm() && !dest->is_single_xmm()) {
 927     __ movflt(Address(rsp, 0), src->as_xmm_float_reg());
 928     __ fld_s(Address(rsp, 0));
 929   } else if (src->is_double_xmm() && !dest->is_double_xmm()) {
 930     __ movdbl(Address(rsp, 0), src->as_xmm_double_reg());
 931     __ fld_d(Address(rsp, 0));
 932   } else if (dest->is_single_xmm() && !src->is_single_xmm()) {
 933     __ fstp_s(Address(rsp, 0));
 934     __ movflt(dest->as_xmm_float_reg(), Address(rsp, 0));
 935   } else if (dest->is_double_xmm() && !src->is_double_xmm()) {
 936     __ fstp_d(Address(rsp, 0));
 937     __ movdbl(dest->as_xmm_double_reg(), Address(rsp, 0));
 938 
 939     // move between xmm-registers
 940   } else if (dest->is_single_xmm()) {
 941     assert(src->is_single_xmm(), "must match");
 942     __ movflt(dest->as_xmm_float_reg(), src->as_xmm_float_reg());
 943   } else if (dest->is_double_xmm()) {
 944     assert(src->is_double_xmm(), "must match");
 945     __ movdbl(dest->as_xmm_double_reg(), src->as_xmm_double_reg());
 946 
 947     // move between fpu-registers (no instruction necessary because of fpu-stack)
 948   } else if (dest->is_single_fpu() || dest->is_double_fpu()) {
 949     assert(src->is_single_fpu() || src->is_double_fpu(), "must match");
 950     assert(src->fpu() == dest->fpu(), "currently should be nothing to do");
 951   } else {
 952     ShouldNotReachHere();
 953   }
 954 }
 955 
 956 void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
 957   assert(src->is_register(), "should not call otherwise");
 958   assert(dest->is_stack(), "should not call otherwise");
 959 
 960   if (src->is_single_cpu()) {
 961     Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
 962     if (type == T_OBJECT || type == T_ARRAY) {
 963       __ verify_oop(src->as_register());
 964       __ movptr (dst, src->as_register());
 965     } else if (type == T_METADATA) {
 966       __ movptr (dst, src->as_register());
 967     } else {
 968       __ movl (dst, src->as_register());
 969     }
 970 
 971   } else if (src->is_double_cpu()) {
 972     Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
 973     Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
 974     __ movptr (dstLO, src->as_register_lo());
 975     NOT_LP64(__ movptr (dstHI, src->as_register_hi()));
 976 
 977   } else if (src->is_single_xmm()) {
 978     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
 979     __ movflt(dst_addr, src->as_xmm_float_reg());
 980 
 981   } else if (src->is_double_xmm()) {
 982     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
 983     __ movdbl(dst_addr, src->as_xmm_double_reg());
 984 
 985   } else if (src->is_single_fpu()) {
 986     assert(src->fpu_regnr() == 0, "argument must be on TOS");
 987     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
 988     if (pop_fpu_stack)     __ fstp_s (dst_addr);
 989     else                   __ fst_s  (dst_addr);
 990 
 991   } else if (src->is_double_fpu()) {
 992     assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
 993     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
 994     if (pop_fpu_stack)     __ fstp_d (dst_addr);
 995     else                   __ fst_d  (dst_addr);
 996 
 997   } else {
 998     ShouldNotReachHere();
 999   }
1000 }
1001 
1002 
1003 void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
1004   LIR_Address* to_addr = dest->as_address_ptr();
1005   PatchingStub* patch = NULL;
1006   Register compressed_src = rscratch1;
1007 
1008   if (type == T_ARRAY || type == T_OBJECT) {
1009     __ verify_oop(src->as_register());
1010 #ifdef _LP64
1011     if (UseCompressedOops && !wide) {
1012       __ movptr(compressed_src, src->as_register());
1013       __ encode_heap_oop(compressed_src);
1014       if (patch_code != lir_patch_none) {
1015         info->oop_map()->set_narrowoop(compressed_src->as_VMReg());
1016       }
1017     }
1018 #endif
1019   }
1020 
1021   if (patch_code != lir_patch_none) {
1022     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1023     Address toa = as_Address(to_addr);
1024     assert(toa.disp() != 0, "must have");
1025   }
1026 
1027   int null_check_here = code_offset();
1028   switch (type) {
1029     case T_FLOAT: {
1030       if (src->is_single_xmm()) {
1031         __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
1032       } else {
1033         assert(src->is_single_fpu(), "must be");
1034         assert(src->fpu_regnr() == 0, "argument must be on TOS");
1035         if (pop_fpu_stack)      __ fstp_s(as_Address(to_addr));
1036         else                    __ fst_s (as_Address(to_addr));
1037       }
1038       break;
1039     }
1040 
1041     case T_DOUBLE: {
1042       if (src->is_double_xmm()) {
1043         __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
1044       } else {
1045         assert(src->is_double_fpu(), "must be");
1046         assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
1047         if (pop_fpu_stack)      __ fstp_d(as_Address(to_addr));
1048         else                    __ fst_d (as_Address(to_addr));
1049       }
1050       break;
1051     }
1052 
1053     case T_ARRAY:   // fall through
1054     case T_OBJECT:  // fall through
1055       if (UseCompressedOops && !wide) {
1056         __ movl(as_Address(to_addr), compressed_src);
1057       } else {
1058         __ movptr(as_Address(to_addr), src->as_register());
1059       }
1060       break;
1061     case T_METADATA:
1062       // We get here to store a method pointer to the stack to pass to
1063       // a dtrace runtime call. This can't work on 64 bit with
1064       // compressed klass ptrs: T_METADATA can be a compressed klass
1065       // ptr or a 64 bit method pointer.
1066       LP64_ONLY(ShouldNotReachHere());
1067       __ movptr(as_Address(to_addr), src->as_register());
1068       break;
1069     case T_ADDRESS:
1070       __ movptr(as_Address(to_addr), src->as_register());
1071       break;
1072     case T_INT:
1073       __ movl(as_Address(to_addr), src->as_register());
1074       break;
1075 
1076     case T_LONG: {
1077       Register from_lo = src->as_register_lo();
1078       Register from_hi = src->as_register_hi();
1079 #ifdef _LP64
1080       __ movptr(as_Address_lo(to_addr), from_lo);
1081 #else
1082       Register base = to_addr->base()->as_register();
1083       Register index = noreg;
1084       if (to_addr->index()->is_register()) {
1085         index = to_addr->index()->as_register();
1086       }
1087       if (base == from_lo || index == from_lo) {
1088         assert(base != from_hi, "can't be");
1089         assert(index == noreg || (index != base && index != from_hi), "can't handle this");
1090         __ movl(as_Address_hi(to_addr), from_hi);
1091         if (patch != NULL) {
1092           patching_epilog(patch, lir_patch_high, base, info);
1093           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1094           patch_code = lir_patch_low;
1095         }
1096         __ movl(as_Address_lo(to_addr), from_lo);
1097       } else {
1098         assert(index == noreg || (index != base && index != from_lo), "can't handle this");
1099         __ movl(as_Address_lo(to_addr), from_lo);
1100         if (patch != NULL) {
1101           patching_epilog(patch, lir_patch_low, base, info);
1102           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1103           patch_code = lir_patch_high;
1104         }
1105         __ movl(as_Address_hi(to_addr), from_hi);
1106       }
1107 #endif // _LP64
1108       break;
1109     }
1110 
1111     case T_BYTE:    // fall through
1112     case T_BOOLEAN: {
1113       Register src_reg = src->as_register();
1114       Address dst_addr = as_Address(to_addr);
1115       assert(VM_Version::is_P6() || src_reg->has_byte_register(), "must use byte registers if not P6");
1116       __ movb(dst_addr, src_reg);
1117       break;
1118     }
1119 
1120     case T_CHAR:    // fall through
1121     case T_SHORT:
1122       __ movw(as_Address(to_addr), src->as_register());
1123       break;
1124 
1125     default:
1126       ShouldNotReachHere();
1127   }
1128   if (info != NULL) {
1129     add_debug_info_for_null_check(null_check_here, info);
1130   }
1131 
1132   if (patch_code != lir_patch_none) {
1133     patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
1134   }
1135 }
1136 
1137 
1138 void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
1139   assert(src->is_stack(), "should not call otherwise");
1140   assert(dest->is_register(), "should not call otherwise");
1141 
1142   if (dest->is_single_cpu()) {
1143     if (type == T_ARRAY || type == T_OBJECT) {
1144       __ movptr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1145       __ verify_oop(dest->as_register());
1146     } else if (type == T_METADATA) {
1147       __ movptr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1148     } else {
1149       __ movl(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1150     }
1151 
1152   } else if (dest->is_double_cpu()) {
1153     Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
1154     Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
1155     __ movptr(dest->as_register_lo(), src_addr_LO);
1156     NOT_LP64(__ movptr(dest->as_register_hi(), src_addr_HI));
1157 
1158   } else if (dest->is_single_xmm()) {
1159     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1160     __ movflt(dest->as_xmm_float_reg(), src_addr);
1161 
1162   } else if (dest->is_double_xmm()) {
1163     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1164     __ movdbl(dest->as_xmm_double_reg(), src_addr);
1165 
1166   } else if (dest->is_single_fpu()) {
1167     assert(dest->fpu_regnr() == 0, "dest must be TOS");
1168     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1169     __ fld_s(src_addr);
1170 
1171   } else if (dest->is_double_fpu()) {
1172     assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1173     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1174     __ fld_d(src_addr);
1175 
1176   } else {
1177     ShouldNotReachHere();
1178   }
1179 }
1180 
1181 
1182 void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
1183   if (src->is_single_stack()) {
1184     if (type == T_OBJECT || type == T_ARRAY) {
1185       __ pushptr(frame_map()->address_for_slot(src ->single_stack_ix()));
1186       __ popptr (frame_map()->address_for_slot(dest->single_stack_ix()));
1187     } else {
1188 #ifndef _LP64
1189       __ pushl(frame_map()->address_for_slot(src ->single_stack_ix()));
1190       __ popl (frame_map()->address_for_slot(dest->single_stack_ix()));
1191 #else
1192       //no pushl on 64bits
1193       __ movl(rscratch1, frame_map()->address_for_slot(src ->single_stack_ix()));
1194       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), rscratch1);
1195 #endif
1196     }
1197 
1198   } else if (src->is_double_stack()) {
1199 #ifdef _LP64
1200     __ pushptr(frame_map()->address_for_slot(src ->double_stack_ix()));
1201     __ popptr (frame_map()->address_for_slot(dest->double_stack_ix()));
1202 #else
1203     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 0));
1204     // push and pop the part at src + wordSize, adding wordSize for the previous push
1205     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 2 * wordSize));
1206     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 2 * wordSize));
1207     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 0));
1208 #endif // _LP64
1209 
1210   } else {
1211     ShouldNotReachHere();
1212   }
1213 }
1214 
1215 
1216 void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
1217   assert(src->is_address(), "should not call otherwise");
1218   assert(dest->is_register(), "should not call otherwise");
1219 
1220   LIR_Address* addr = src->as_address_ptr();
1221   Address from_addr = as_Address(addr);
1222 
1223   if (addr->base()->type() == T_OBJECT) {
1224     __ verify_oop(addr->base()->as_pointer_register());
1225   }
1226 
1227   switch (type) {
1228     case T_BOOLEAN: // fall through
1229     case T_BYTE:    // fall through
1230     case T_CHAR:    // fall through
1231     case T_SHORT:
1232       if (!VM_Version::is_P6() && !from_addr.uses(dest->as_register())) {
1233         // on pre P6 processors we may get partial register stalls
1234         // so blow away the value of to_rinfo before loading a
1235         // partial word into it.  Do it here so that it precedes
1236         // the potential patch point below.
1237         __ xorptr(dest->as_register(), dest->as_register());
1238       }
1239       break;
1240   }
1241 
1242   PatchingStub* patch = NULL;
1243   if (patch_code != lir_patch_none) {
1244     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1245     assert(from_addr.disp() != 0, "must have");
1246   }
1247   if (info != NULL) {
1248     add_debug_info_for_null_check_here(info);
1249   }
1250 
1251   switch (type) {
1252     case T_FLOAT: {
1253       if (dest->is_single_xmm()) {
1254         __ movflt(dest->as_xmm_float_reg(), from_addr);
1255       } else {
1256         assert(dest->is_single_fpu(), "must be");
1257         assert(dest->fpu_regnr() == 0, "dest must be TOS");
1258         __ fld_s(from_addr);
1259       }
1260       break;
1261     }
1262 
1263     case T_DOUBLE: {
1264       if (dest->is_double_xmm()) {
1265         __ movdbl(dest->as_xmm_double_reg(), from_addr);
1266       } else {
1267         assert(dest->is_double_fpu(), "must be");
1268         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1269         __ fld_d(from_addr);
1270       }
1271       break;
1272     }
1273 
1274     case T_OBJECT:  // fall through
1275     case T_ARRAY:   // fall through
1276       if (UseCompressedOops && !wide) {
1277         __ movl(dest->as_register(), from_addr);
1278       } else {
1279         __ movptr(dest->as_register(), from_addr);
1280       }
1281       break;
1282 
1283     case T_ADDRESS:
1284       if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
1285         __ movl(dest->as_register(), from_addr);
1286       } else {
1287         __ movptr(dest->as_register(), from_addr);
1288       }
1289       break;
1290     case T_INT:
1291       __ movl(dest->as_register(), from_addr);
1292       break;
1293 
1294     case T_LONG: {
1295       Register to_lo = dest->as_register_lo();
1296       Register to_hi = dest->as_register_hi();
1297 #ifdef _LP64
1298       __ movptr(to_lo, as_Address_lo(addr));
1299 #else
1300       Register base = addr->base()->as_register();
1301       Register index = noreg;
1302       if (addr->index()->is_register()) {
1303         index = addr->index()->as_register();
1304       }
1305       if ((base == to_lo && index == to_hi) ||
1306           (base == to_hi && index == to_lo)) {
1307         // addresses with 2 registers are only formed as a result of
1308         // array access so this code will never have to deal with
1309         // patches or null checks.
1310         assert(info == NULL && patch == NULL, "must be");
1311         __ lea(to_hi, as_Address(addr));
1312         __ movl(to_lo, Address(to_hi, 0));
1313         __ movl(to_hi, Address(to_hi, BytesPerWord));
1314       } else if (base == to_lo || index == to_lo) {
1315         assert(base != to_hi, "can't be");
1316         assert(index == noreg || (index != base && index != to_hi), "can't handle this");
1317         __ movl(to_hi, as_Address_hi(addr));
1318         if (patch != NULL) {
1319           patching_epilog(patch, lir_patch_high, base, info);
1320           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1321           patch_code = lir_patch_low;
1322         }
1323         __ movl(to_lo, as_Address_lo(addr));
1324       } else {
1325         assert(index == noreg || (index != base && index != to_lo), "can't handle this");
1326         __ movl(to_lo, as_Address_lo(addr));
1327         if (patch != NULL) {
1328           patching_epilog(patch, lir_patch_low, base, info);
1329           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1330           patch_code = lir_patch_high;
1331         }
1332         __ movl(to_hi, as_Address_hi(addr));
1333       }
1334 #endif // _LP64
1335       break;
1336     }
1337 
1338     case T_BOOLEAN: // fall through
1339     case T_BYTE: {
1340       Register dest_reg = dest->as_register();
1341       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1342       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1343         __ movsbl(dest_reg, from_addr);
1344       } else {
1345         __ movb(dest_reg, from_addr);
1346         __ shll(dest_reg, 24);
1347         __ sarl(dest_reg, 24);
1348       }
1349       break;
1350     }
1351 
1352     case T_CHAR: {
1353       Register dest_reg = dest->as_register();
1354       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1355       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1356         __ movzwl(dest_reg, from_addr);
1357       } else {
1358         __ movw(dest_reg, from_addr);
1359       }
1360       break;
1361     }
1362 
1363     case T_SHORT: {
1364       Register dest_reg = dest->as_register();
1365       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1366         __ movswl(dest_reg, from_addr);
1367       } else {
1368         __ movw(dest_reg, from_addr);
1369         __ shll(dest_reg, 16);
1370         __ sarl(dest_reg, 16);
1371       }
1372       break;
1373     }
1374 
1375     default:
1376       ShouldNotReachHere();
1377   }
1378 
1379   if (patch != NULL) {
1380     patching_epilog(patch, patch_code, addr->base()->as_register(), info);
1381   }
1382 
1383   if (type == T_ARRAY || type == T_OBJECT) {
1384 #ifdef _LP64
1385     if (UseCompressedOops && !wide) {
1386       __ decode_heap_oop(dest->as_register());
1387     }
1388 #endif
1389     __ verify_oop(dest->as_register());
1390   } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
1391 #ifdef _LP64
1392     if (UseCompressedClassPointers) {
1393       __ decode_klass_not_null(dest->as_register());
1394     }
1395 #endif
1396   }
1397 }
1398 
1399 
1400 NEEDS_CLEANUP; // This could be static?
1401 Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
1402   int elem_size = type2aelembytes(type);
1403   switch (elem_size) {
1404     case 1: return Address::times_1;
1405     case 2: return Address::times_2;
1406     case 4: return Address::times_4;
1407     case 8: return Address::times_8;
1408   }
1409   ShouldNotReachHere();
1410   return Address::no_scale;
1411 }
1412 
1413 
1414 void LIR_Assembler::emit_op3(LIR_Op3* op) {
1415   switch (op->code()) {
1416     case lir_idiv:
1417     case lir_irem:
1418       arithmetic_idiv(op->code(),
1419                       op->in_opr1(),
1420                       op->in_opr2(),
1421                       op->in_opr3(),
1422                       op->result_opr(),
1423                       op->info());
1424       break;
1425     default:      ShouldNotReachHere(); break;
1426   }
1427 }
1428 
1429 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
1430 #ifdef ASSERT
1431   assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
1432   if (op->block() != NULL)  _branch_target_blocks.append(op->block());
1433   if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
1434 #endif
1435 
1436   if (op->cond() == lir_cond_always) {
1437     if (op->info() != NULL) add_debug_info_for_branch(op->info());
1438     __ jmp (*(op->label()));
1439   } else {
1440     Assembler::Condition acond = Assembler::zero;
1441     if (op->code() == lir_cond_float_branch) {
1442       assert(op->ublock() != NULL, "must have unordered successor");
1443       __ jcc(Assembler::parity, *(op->ublock()->label()));
1444       switch(op->cond()) {
1445         case lir_cond_equal:        acond = Assembler::equal;      break;
1446         case lir_cond_notEqual:     acond = Assembler::notEqual;   break;
1447         case lir_cond_less:         acond = Assembler::below;      break;
1448         case lir_cond_lessEqual:    acond = Assembler::belowEqual; break;
1449         case lir_cond_greaterEqual: acond = Assembler::aboveEqual; break;
1450         case lir_cond_greater:      acond = Assembler::above;      break;
1451         default:                         ShouldNotReachHere();
1452       }
1453     } else {
1454       switch (op->cond()) {
1455         case lir_cond_equal:        acond = Assembler::equal;       break;
1456         case lir_cond_notEqual:     acond = Assembler::notEqual;    break;
1457         case lir_cond_less:         acond = Assembler::less;        break;
1458         case lir_cond_lessEqual:    acond = Assembler::lessEqual;   break;
1459         case lir_cond_greaterEqual: acond = Assembler::greaterEqual;break;
1460         case lir_cond_greater:      acond = Assembler::greater;     break;
1461         case lir_cond_belowEqual:   acond = Assembler::belowEqual;  break;
1462         case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;  break;
1463         default:                         ShouldNotReachHere();
1464       }
1465     }
1466     __ jcc(acond,*(op->label()));
1467   }
1468 }
1469 
1470 void LIR_Assembler::emit_opShenandoahWriteBarrier(LIR_OpShenandoahWriteBarrier* op) {
1471   Label done;
1472   Register obj = op->in_opr()->as_register();
1473   Register res = op->result_opr()->as_register();
1474   Register tmp1 = op->tmp1_opr()->as_register();
1475   Register tmp2 = op->tmp2_opr()->as_register();
1476   assert_different_registers(res, tmp1, tmp2);
1477 
1478   if (res != obj) {
1479     __ mov(res, obj);
1480   }
1481 
1482   // Check for null.
1483   if (op->need_null_check()) {
1484     __ testptr(res, res);
1485     __ jcc(Assembler::zero, done);
1486   }
1487 
1488   // Check for evacuation-in-progress
1489   Address evacuation_in_progress = Address(r15_thread, in_bytes(JavaThread::evacuation_in_progress_offset()));
1490   __ cmpb(evacuation_in_progress, 0);
1491 
1492   // The read-barrier.
1493   __ movptr(res, Address(res, -8));
1494 
1495   __ jcc(Assembler::equal, done);
1496 
1497   // Check for object in collection set.
1498   __ movptr(tmp1, res);
1499   __ shrptr(tmp1, ShenandoahHeapRegion::RegionSizeShift);
1500   __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
1501   __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
1502   __ testb(tmp2, 0x1);
1503   __ jcc(Assembler::zero, done);
1504 
1505   if (res != rax) {
1506     __ xchgptr(res, rax); // Move obj into rax and save rax into obj.
1507   }
1508 
1509   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::shenandoah_write_barrier_slow_id)));
1510 
1511   if (res != rax) {
1512     __ xchgptr(rax, res); // Swap back obj with rax.
1513   }
1514 
1515   __ bind(done);
1516 
1517 }
1518 
1519 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
1520   LIR_Opr src  = op->in_opr();
1521   LIR_Opr dest = op->result_opr();
1522 
1523   switch (op->bytecode()) {
1524     case Bytecodes::_i2l:
1525 #ifdef _LP64
1526       __ movl2ptr(dest->as_register_lo(), src->as_register());
1527 #else
1528       move_regs(src->as_register(), dest->as_register_lo());
1529       move_regs(src->as_register(), dest->as_register_hi());
1530       __ sarl(dest->as_register_hi(), 31);
1531 #endif // LP64
1532       break;
1533 
1534     case Bytecodes::_l2i:
1535 #ifdef _LP64
1536       __ movl(dest->as_register(), src->as_register_lo());
1537 #else
1538       move_regs(src->as_register_lo(), dest->as_register());
1539 #endif
1540       break;
1541 
1542     case Bytecodes::_i2b:
1543       move_regs(src->as_register(), dest->as_register());
1544       __ sign_extend_byte(dest->as_register());
1545       break;
1546 
1547     case Bytecodes::_i2c:
1548       move_regs(src->as_register(), dest->as_register());
1549       __ andl(dest->as_register(), 0xFFFF);
1550       break;
1551 
1552     case Bytecodes::_i2s:
1553       move_regs(src->as_register(), dest->as_register());
1554       __ sign_extend_short(dest->as_register());
1555       break;
1556 
1557 
1558     case Bytecodes::_f2d:
1559     case Bytecodes::_d2f:
1560       if (dest->is_single_xmm()) {
1561         __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg());
1562       } else if (dest->is_double_xmm()) {
1563         __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg());
1564       } else {
1565         assert(src->fpu() == dest->fpu(), "register must be equal");
1566         // do nothing (float result is rounded later through spilling)
1567       }
1568       break;
1569 
1570     case Bytecodes::_i2f:
1571     case Bytecodes::_i2d:
1572       if (dest->is_single_xmm()) {
1573         __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register());
1574       } else if (dest->is_double_xmm()) {
1575         __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register());
1576       } else {
1577         assert(dest->fpu() == 0, "result must be on TOS");
1578         __ movl(Address(rsp, 0), src->as_register());
1579         __ fild_s(Address(rsp, 0));
1580       }
1581       break;
1582 
1583     case Bytecodes::_f2i:
1584     case Bytecodes::_d2i:
1585       if (src->is_single_xmm()) {
1586         __ cvttss2sil(dest->as_register(), src->as_xmm_float_reg());
1587       } else if (src->is_double_xmm()) {
1588         __ cvttsd2sil(dest->as_register(), src->as_xmm_double_reg());
1589       } else {
1590         assert(src->fpu() == 0, "input must be on TOS");
1591         __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
1592         __ fist_s(Address(rsp, 0));
1593         __ movl(dest->as_register(), Address(rsp, 0));
1594         __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1595       }
1596 
1597       // IA32 conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
1598       assert(op->stub() != NULL, "stub required");
1599       __ cmpl(dest->as_register(), 0x80000000);
1600       __ jcc(Assembler::equal, *op->stub()->entry());
1601       __ bind(*op->stub()->continuation());
1602       break;
1603 
1604     case Bytecodes::_l2f:
1605     case Bytecodes::_l2d:
1606       assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)");
1607       assert(dest->fpu() == 0, "result must be on TOS");
1608 
1609       __ movptr(Address(rsp, 0),            src->as_register_lo());
1610       NOT_LP64(__ movl(Address(rsp, BytesPerWord), src->as_register_hi()));
1611       __ fild_d(Address(rsp, 0));
1612       // float result is rounded later through spilling
1613       break;
1614 
1615     case Bytecodes::_f2l:
1616     case Bytecodes::_d2l:
1617       assert(!src->is_xmm_register(), "input in xmm register not supported (no SSE instruction present)");
1618       assert(src->fpu() == 0, "input must be on TOS");
1619       assert(dest == FrameMap::long0_opr, "runtime stub places result in these registers");
1620 
1621       // instruction sequence too long to inline it here
1622       {
1623         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::fpu2long_stub_id)));
1624       }
1625       break;
1626 
1627     default: ShouldNotReachHere();
1628   }
1629 }
1630 
1631 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
1632   if (op->init_check()) {
1633     __ cmpb(Address(op->klass()->as_register(),
1634                     InstanceKlass::init_state_offset()),
1635                     InstanceKlass::fully_initialized);
1636     add_debug_info_for_null_check_here(op->stub()->info());
1637     __ jcc(Assembler::notEqual, *op->stub()->entry());
1638   }
1639   __ allocate_object(op->obj()->as_register(),
1640                      op->tmp1()->as_register(),
1641                      op->tmp2()->as_register(),
1642                      op->header_size(),
1643                      op->object_size(),
1644                      op->klass()->as_register(),
1645                      *op->stub()->entry());
1646   __ bind(*op->stub()->continuation());
1647 }
1648 
1649 void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
1650   Register len =  op->len()->as_register();
1651   LP64_ONLY( __ movslq(len, len); )
1652 
1653   if (UseSlowPath ||
1654       (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
1655       (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
1656     __ jmp(*op->stub()->entry());
1657   } else {
1658     Register tmp1 = op->tmp1()->as_register();
1659     Register tmp2 = op->tmp2()->as_register();
1660     Register tmp3 = op->tmp3()->as_register();
1661     if (len == tmp1) {
1662       tmp1 = tmp3;
1663     } else if (len == tmp2) {
1664       tmp2 = tmp3;
1665     } else if (len == tmp3) {
1666       // everything is ok
1667     } else {
1668       __ mov(tmp3, len);
1669     }
1670     __ allocate_array(op->obj()->as_register(),
1671                       len,
1672                       tmp1,
1673                       tmp2,
1674                       arrayOopDesc::header_size(op->type()),
1675                       array_element_size(op->type()),
1676                       op->klass()->as_register(),
1677                       *op->stub()->entry());
1678   }
1679   __ bind(*op->stub()->continuation());
1680 }
1681 
1682 void LIR_Assembler::type_profile_helper(Register mdo,
1683                                         ciMethodData *md, ciProfileData *data,
1684                                         Register recv, Label* update_done) {
1685   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1686     Label next_test;
1687     // See if the receiver is receiver[n].
1688     __ cmpptr(recv, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
1689     __ jccb(Assembler::notEqual, next_test);
1690     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
1691     __ addptr(data_addr, DataLayout::counter_increment);
1692     __ jmp(*update_done);
1693     __ bind(next_test);
1694   }
1695 
1696   // Didn't find receiver; find next empty slot and fill it in
1697   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1698     Label next_test;
1699     Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
1700     __ cmpptr(recv_addr, (intptr_t)NULL_WORD);
1701     __ jccb(Assembler::notEqual, next_test);
1702     __ movptr(recv_addr, recv);
1703     __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment);
1704     __ jmp(*update_done);
1705     __ bind(next_test);
1706   }
1707 }
1708 
1709 void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
1710   // we always need a stub for the failure case.
1711   CodeStub* stub = op->stub();
1712   Register obj = op->object()->as_register();
1713   Register k_RInfo = op->tmp1()->as_register();
1714   Register klass_RInfo = op->tmp2()->as_register();
1715   Register dst = op->result_opr()->as_register();
1716   ciKlass* k = op->klass();
1717   Register Rtmp1 = noreg;
1718 
1719   // check if it needs to be profiled
1720   ciMethodData* md;
1721   ciProfileData* data;
1722 
1723   if (op->should_profile()) {
1724     ciMethod* method = op->profiled_method();
1725     assert(method != NULL, "Should have method");
1726     int bci = op->profiled_bci();
1727     md = method->method_data_or_null();
1728     assert(md != NULL, "Sanity");
1729     data = md->bci_to_data(bci);
1730     assert(data != NULL,                "need data for type check");
1731     assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1732   }
1733   Label profile_cast_success, profile_cast_failure;
1734   Label *success_target = op->should_profile() ? &profile_cast_success : success;
1735   Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
1736 
1737   if (obj == k_RInfo) {
1738     k_RInfo = dst;
1739   } else if (obj == klass_RInfo) {
1740     klass_RInfo = dst;
1741   }
1742   if (k->is_loaded() && !UseCompressedClassPointers) {
1743     select_different_registers(obj, dst, k_RInfo, klass_RInfo);
1744   } else {
1745     Rtmp1 = op->tmp3()->as_register();
1746     select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
1747   }
1748 
1749   assert_different_registers(obj, k_RInfo, klass_RInfo);
1750 
1751   __ cmpptr(obj, (int32_t)NULL_WORD);
1752   if (op->should_profile()) {
1753     Label not_null;
1754     __ jccb(Assembler::notEqual, not_null);
1755     // Object is null; update MDO and exit
1756     Register mdo  = klass_RInfo;
1757     __ mov_metadata(mdo, md->constant_encoding());
1758     Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
1759     int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
1760     __ orl(data_addr, header_bits);
1761     __ jmp(*obj_is_null);
1762     __ bind(not_null);
1763   } else {
1764     __ jcc(Assembler::equal, *obj_is_null);
1765   }
1766 
1767   if (!k->is_loaded()) {
1768     klass2reg_with_patching(k_RInfo, op->info_for_patch());
1769   } else {
1770 #ifdef _LP64
1771     __ mov_metadata(k_RInfo, k->constant_encoding());
1772 #endif // _LP64
1773   }
1774   __ verify_oop(obj);
1775 
1776   if (op->fast_check()) {
1777     // get object class
1778     // not a safepoint as obj null check happens earlier
1779 #ifdef _LP64
1780     if (UseCompressedClassPointers) {
1781       __ load_klass(Rtmp1, obj);
1782       __ cmpptr(k_RInfo, Rtmp1);
1783     } else {
1784       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1785     }
1786 #else
1787     if (k->is_loaded()) {
1788       __ cmpklass(Address(obj, oopDesc::klass_offset_in_bytes()), k->constant_encoding());
1789     } else {
1790       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1791     }
1792 #endif
1793     __ jcc(Assembler::notEqual, *failure_target);
1794     // successful cast, fall through to profile or jump
1795   } else {
1796     // get object class
1797     // not a safepoint as obj null check happens earlier
1798     __ load_klass(klass_RInfo, obj);
1799     if (k->is_loaded()) {
1800       // See if we get an immediate positive hit
1801 #ifdef _LP64
1802       __ cmpptr(k_RInfo, Address(klass_RInfo, k->super_check_offset()));
1803 #else
1804       __ cmpklass(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
1805 #endif // _LP64
1806       if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
1807         __ jcc(Assembler::notEqual, *failure_target);
1808         // successful cast, fall through to profile or jump
1809       } else {
1810         // See if we get an immediate positive hit
1811         __ jcc(Assembler::equal, *success_target);
1812         // check for self
1813 #ifdef _LP64
1814         __ cmpptr(klass_RInfo, k_RInfo);
1815 #else
1816         __ cmpklass(klass_RInfo, k->constant_encoding());
1817 #endif // _LP64
1818         __ jcc(Assembler::equal, *success_target);
1819 
1820         __ push(klass_RInfo);
1821 #ifdef _LP64
1822         __ push(k_RInfo);
1823 #else
1824         __ pushklass(k->constant_encoding());
1825 #endif // _LP64
1826         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1827         __ pop(klass_RInfo);
1828         __ pop(klass_RInfo);
1829         // result is a boolean
1830         __ cmpl(klass_RInfo, 0);
1831         __ jcc(Assembler::equal, *failure_target);
1832         // successful cast, fall through to profile or jump
1833       }
1834     } else {
1835       // perform the fast part of the checking logic
1836       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1837       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1838       __ push(klass_RInfo);
1839       __ push(k_RInfo);
1840       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1841       __ pop(klass_RInfo);
1842       __ pop(k_RInfo);
1843       // result is a boolean
1844       __ cmpl(k_RInfo, 0);
1845       __ jcc(Assembler::equal, *failure_target);
1846       // successful cast, fall through to profile or jump
1847     }
1848   }
1849   if (op->should_profile()) {
1850     Register mdo  = klass_RInfo, recv = k_RInfo;
1851     __ bind(profile_cast_success);
1852     __ mov_metadata(mdo, md->constant_encoding());
1853     __ load_klass(recv, obj);
1854     Label update_done;
1855     type_profile_helper(mdo, md, data, recv, success);
1856     __ jmp(*success);
1857 
1858     __ bind(profile_cast_failure);
1859     __ mov_metadata(mdo, md->constant_encoding());
1860     Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1861     __ subptr(counter_addr, DataLayout::counter_increment);
1862     __ jmp(*failure);
1863   }
1864   __ jmp(*success);
1865 }
1866 
1867 
1868 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
1869   LIR_Code code = op->code();
1870   if (code == lir_store_check) {
1871     Register value = op->object()->as_register();
1872     Register array = op->array()->as_register();
1873     Register k_RInfo = op->tmp1()->as_register();
1874     Register klass_RInfo = op->tmp2()->as_register();
1875     Register Rtmp1 = op->tmp3()->as_register();
1876 
1877     CodeStub* stub = op->stub();
1878 
1879     // check if it needs to be profiled
1880     ciMethodData* md;
1881     ciProfileData* data;
1882 
1883     if (op->should_profile()) {
1884       ciMethod* method = op->profiled_method();
1885       assert(method != NULL, "Should have method");
1886       int bci = op->profiled_bci();
1887       md = method->method_data_or_null();
1888       assert(md != NULL, "Sanity");
1889       data = md->bci_to_data(bci);
1890       assert(data != NULL,                "need data for type check");
1891       assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1892     }
1893     Label profile_cast_success, profile_cast_failure, done;
1894     Label *success_target = op->should_profile() ? &profile_cast_success : &done;
1895     Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
1896 
1897     __ cmpptr(value, (int32_t)NULL_WORD);
1898     if (op->should_profile()) {
1899       Label not_null;
1900       __ jccb(Assembler::notEqual, not_null);
1901       // Object is null; update MDO and exit
1902       Register mdo  = klass_RInfo;
1903       __ mov_metadata(mdo, md->constant_encoding());
1904       Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
1905       int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
1906       __ orl(data_addr, header_bits);
1907       __ jmp(done);
1908       __ bind(not_null);
1909     } else {
1910       __ jcc(Assembler::equal, done);
1911     }
1912 
1913     add_debug_info_for_null_check_here(op->info_for_exception());
1914     __ load_klass(k_RInfo, array);
1915     __ load_klass(klass_RInfo, value);
1916 
1917     // get instance klass (it's already uncompressed)
1918     __ movptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
1919     // perform the fast part of the checking logic
1920     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1921     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1922     __ push(klass_RInfo);
1923     __ push(k_RInfo);
1924     __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1925     __ pop(klass_RInfo);
1926     __ pop(k_RInfo);
1927     // result is a boolean
1928     __ cmpl(k_RInfo, 0);
1929     __ jcc(Assembler::equal, *failure_target);
1930     // fall through to the success case
1931 
1932     if (op->should_profile()) {
1933       Register mdo  = klass_RInfo, recv = k_RInfo;
1934       __ bind(profile_cast_success);
1935       __ mov_metadata(mdo, md->constant_encoding());
1936       __ load_klass(recv, value);
1937       Label update_done;
1938       type_profile_helper(mdo, md, data, recv, &done);
1939       __ jmpb(done);
1940 
1941       __ bind(profile_cast_failure);
1942       __ mov_metadata(mdo, md->constant_encoding());
1943       Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1944       __ subptr(counter_addr, DataLayout::counter_increment);
1945       __ jmp(*stub->entry());
1946     }
1947 
1948     __ bind(done);
1949   } else
1950     if (code == lir_checkcast) {
1951       Register obj = op->object()->as_register();
1952       Register dst = op->result_opr()->as_register();
1953       Label success;
1954       emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
1955       __ bind(success);
1956       if (dst != obj) {
1957         __ mov(dst, obj);
1958       }
1959     } else
1960       if (code == lir_instanceof) {
1961         Register obj = op->object()->as_register();
1962         Register dst = op->result_opr()->as_register();
1963         Label success, failure, done;
1964         emit_typecheck_helper(op, &success, &failure, &failure);
1965         __ bind(failure);
1966         __ xorptr(dst, dst);
1967         __ jmpb(done);
1968         __ bind(success);
1969         __ movptr(dst, 1);
1970         __ bind(done);
1971       } else {
1972         ShouldNotReachHere();
1973       }
1974 
1975 }
1976 
1977 
1978 void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
1979   if (LP64_ONLY(false &&) op->code() == lir_cas_long && VM_Version::supports_cx8()) {
1980     assert(op->cmp_value()->as_register_lo() == rax, "wrong register");
1981     assert(op->cmp_value()->as_register_hi() == rdx, "wrong register");
1982     assert(op->new_value()->as_register_lo() == rbx, "wrong register");
1983     assert(op->new_value()->as_register_hi() == rcx, "wrong register");
1984     Register addr = op->addr()->as_register();
1985     if (os::is_MP()) {
1986       __ lock();
1987     }
1988     NOT_LP64(__ cmpxchg8(Address(addr, 0)));
1989 
1990   } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj ) {
1991     NOT_LP64(assert(op->addr()->is_single_cpu(), "must be single");)
1992     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
1993     Register newval = op->new_value()->as_register();
1994     Register cmpval = op->cmp_value()->as_register();
1995     assert(cmpval == rax, "wrong register");
1996     assert(newval != NULL, "new val must be register");
1997     assert(cmpval != newval, "cmp and new values must be in different registers");
1998     assert(cmpval != addr, "cmp and addr must be in different registers");
1999     assert(newval != addr, "new value and addr must be in different registers");
2000 
2001     if ( op->code() == lir_cas_obj) {
2002 #ifdef _LP64
2003       if (UseCompressedOops) {
2004         __ encode_heap_oop(cmpval);
2005         __ mov(rscratch1, newval);
2006         __ encode_heap_oop(rscratch1);
2007         if (os::is_MP()) {
2008           __ lock();
2009         }
2010         // cmpval (rax) is implicitly used by this instruction
2011         __ cmpxchgl(rscratch1, Address(addr, 0));
2012       } else
2013 #endif
2014       {
2015         if (UseShenandoahGC) {
2016           Label done;
2017           Label retry;
2018 
2019           __ bind(retry);
2020 
2021           // Save original cmp-value into tmp1, before following cas destroys it.
2022           __ movptr(op->tmp1()->as_register(), op->cmp_value()->as_register());
2023 
2024           if (os::is_MP()) {
2025             __ lock();
2026           }
2027           __ cmpxchgptr(newval, Address(addr, 0));
2028 
2029           // If the cmpxchg succeeded, then we're done.
2030           __ jcc(Assembler::equal, done);
2031 
2032           // Resolve the original cmp value.
2033           oopDesc::bs()->interpreter_read_barrier(masm(), op->tmp1()->as_register());
2034           // Resolve the old value at address. We get the old value in cmp/rax
2035           // when the comparison in cmpxchg failed.
2036           __ movptr(op->tmp2()->as_register(), cmpval);
2037           oopDesc::bs()->interpreter_read_barrier(masm(), op->tmp2()->as_register());
2038 
2039           // We're done if the expected/cmp value is not the same as old. It's a valid
2040           // cmpxchg failure then. Otherwise we need special treatment for Shenandoah
2041           // to prevent false positives.
2042           __ cmpptr(op->tmp1()->as_register(), op->tmp2()->as_register());
2043           __ jcc(Assembler::equal, retry);
2044 
2045           __ bind(done);
2046         } else {
2047           if (os::is_MP()) {
2048             __ lock();
2049           }
2050           __ cmpxchgptr(newval, Address(addr, 0));
2051         }
2052       }
2053     } else {
2054       assert(op->code() == lir_cas_int, "lir_cas_int expected");
2055       if (os::is_MP()) {
2056         __ lock();
2057       }
2058       __ cmpxchgl(newval, Address(addr, 0));
2059     }
2060 #ifdef _LP64
2061   } else if (op->code() == lir_cas_long) {
2062     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
2063     Register newval = op->new_value()->as_register_lo();
2064     Register cmpval = op->cmp_value()->as_register_lo();
2065     assert(cmpval == rax, "wrong register");
2066     assert(newval != NULL, "new val must be register");
2067     assert(cmpval != newval, "cmp and new values must be in different registers");
2068     assert(cmpval != addr, "cmp and addr must be in different registers");
2069     assert(newval != addr, "new value and addr must be in different registers");
2070     if (os::is_MP()) {
2071       __ lock();
2072     }
2073     __ cmpxchgq(newval, Address(addr, 0));
2074 #endif // _LP64
2075   } else {
2076     Unimplemented();
2077   }
2078 }
2079 
2080 void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
2081   Assembler::Condition acond, ncond;
2082   switch (condition) {
2083     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
2084     case lir_cond_notEqual:     acond = Assembler::notEqual;     ncond = Assembler::equal;        break;
2085     case lir_cond_less:         acond = Assembler::less;         ncond = Assembler::greaterEqual; break;
2086     case lir_cond_lessEqual:    acond = Assembler::lessEqual;    ncond = Assembler::greater;      break;
2087     case lir_cond_greaterEqual: acond = Assembler::greaterEqual; ncond = Assembler::less;         break;
2088     case lir_cond_greater:      acond = Assembler::greater;      ncond = Assembler::lessEqual;    break;
2089     case lir_cond_belowEqual:   acond = Assembler::belowEqual;   ncond = Assembler::above;        break;
2090     case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;   ncond = Assembler::below;        break;
2091     default:                    ShouldNotReachHere();
2092   }
2093 
2094   if (opr1->is_cpu_register()) {
2095     reg2reg(opr1, result);
2096   } else if (opr1->is_stack()) {
2097     stack2reg(opr1, result, result->type());
2098   } else if (opr1->is_constant()) {
2099     const2reg(opr1, result, lir_patch_none, NULL);
2100   } else {
2101     ShouldNotReachHere();
2102   }
2103 
2104   if (VM_Version::supports_cmov() && !opr2->is_constant()) {
2105     // optimized version that does not require a branch
2106     if (opr2->is_single_cpu()) {
2107       assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move");
2108       __ cmov(ncond, result->as_register(), opr2->as_register());
2109     } else if (opr2->is_double_cpu()) {
2110       assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2111       assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2112       __ cmovptr(ncond, result->as_register_lo(), opr2->as_register_lo());
2113       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), opr2->as_register_hi());)
2114     } else if (opr2->is_single_stack()) {
2115       __ cmovl(ncond, result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()));
2116     } else if (opr2->is_double_stack()) {
2117       __ cmovptr(ncond, result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix(), lo_word_offset_in_bytes));
2118       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), frame_map()->address_for_slot(opr2->double_stack_ix(), hi_word_offset_in_bytes));)
2119     } else {
2120       ShouldNotReachHere();
2121     }
2122 
2123   } else {
2124     Label skip;
2125     __ jcc (acond, skip);
2126     if (opr2->is_cpu_register()) {
2127       reg2reg(opr2, result);
2128     } else if (opr2->is_stack()) {
2129       stack2reg(opr2, result, result->type());
2130     } else if (opr2->is_constant()) {
2131       const2reg(opr2, result, lir_patch_none, NULL);
2132     } else {
2133       ShouldNotReachHere();
2134     }
2135     __ bind(skip);
2136   }
2137 }
2138 
2139 
2140 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
2141   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
2142 
2143   if (left->is_single_cpu()) {
2144     assert(left == dest, "left and dest must be equal");
2145     Register lreg = left->as_register();
2146 
2147     if (right->is_single_cpu()) {
2148       // cpu register - cpu register
2149       Register rreg = right->as_register();
2150       switch (code) {
2151         case lir_add: __ addl (lreg, rreg); break;
2152         case lir_sub: __ subl (lreg, rreg); break;
2153         case lir_mul: __ imull(lreg, rreg); break;
2154         default:      ShouldNotReachHere();
2155       }
2156 
2157     } else if (right->is_stack()) {
2158       // cpu register - stack
2159       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2160       switch (code) {
2161         case lir_add: __ addl(lreg, raddr); break;
2162         case lir_sub: __ subl(lreg, raddr); break;
2163         default:      ShouldNotReachHere();
2164       }
2165 
2166     } else if (right->is_constant()) {
2167       // cpu register - constant
2168       jint c = right->as_constant_ptr()->as_jint();
2169       switch (code) {
2170         case lir_add: {
2171           __ incrementl(lreg, c);
2172           break;
2173         }
2174         case lir_sub: {
2175           __ decrementl(lreg, c);
2176           break;
2177         }
2178         default: ShouldNotReachHere();
2179       }
2180 
2181     } else {
2182       ShouldNotReachHere();
2183     }
2184 
2185   } else if (left->is_double_cpu()) {
2186     assert(left == dest, "left and dest must be equal");
2187     Register lreg_lo = left->as_register_lo();
2188     Register lreg_hi = left->as_register_hi();
2189 
2190     if (right->is_double_cpu()) {
2191       // cpu register - cpu register
2192       Register rreg_lo = right->as_register_lo();
2193       Register rreg_hi = right->as_register_hi();
2194       NOT_LP64(assert_different_registers(lreg_lo, lreg_hi, rreg_lo, rreg_hi));
2195       LP64_ONLY(assert_different_registers(lreg_lo, rreg_lo));
2196       switch (code) {
2197         case lir_add:
2198           __ addptr(lreg_lo, rreg_lo);
2199           NOT_LP64(__ adcl(lreg_hi, rreg_hi));
2200           break;
2201         case lir_sub:
2202           __ subptr(lreg_lo, rreg_lo);
2203           NOT_LP64(__ sbbl(lreg_hi, rreg_hi));
2204           break;
2205         case lir_mul:
2206 #ifdef _LP64
2207           __ imulq(lreg_lo, rreg_lo);
2208 #else
2209           assert(lreg_lo == rax && lreg_hi == rdx, "must be");
2210           __ imull(lreg_hi, rreg_lo);
2211           __ imull(rreg_hi, lreg_lo);
2212           __ addl (rreg_hi, lreg_hi);
2213           __ mull (rreg_lo);
2214           __ addl (lreg_hi, rreg_hi);
2215 #endif // _LP64
2216           break;
2217         default:
2218           ShouldNotReachHere();
2219       }
2220 
2221     } else if (right->is_constant()) {
2222       // cpu register - constant
2223 #ifdef _LP64
2224       jlong c = right->as_constant_ptr()->as_jlong_bits();
2225       __ movptr(r10, (intptr_t) c);
2226       switch (code) {
2227         case lir_add:
2228           __ addptr(lreg_lo, r10);
2229           break;
2230         case lir_sub:
2231           __ subptr(lreg_lo, r10);
2232           break;
2233         default:
2234           ShouldNotReachHere();
2235       }
2236 #else
2237       jint c_lo = right->as_constant_ptr()->as_jint_lo();
2238       jint c_hi = right->as_constant_ptr()->as_jint_hi();
2239       switch (code) {
2240         case lir_add:
2241           __ addptr(lreg_lo, c_lo);
2242           __ adcl(lreg_hi, c_hi);
2243           break;
2244         case lir_sub:
2245           __ subptr(lreg_lo, c_lo);
2246           __ sbbl(lreg_hi, c_hi);
2247           break;
2248         default:
2249           ShouldNotReachHere();
2250       }
2251 #endif // _LP64
2252 
2253     } else {
2254       ShouldNotReachHere();
2255     }
2256 
2257   } else if (left->is_single_xmm()) {
2258     assert(left == dest, "left and dest must be equal");
2259     XMMRegister lreg = left->as_xmm_float_reg();
2260 
2261     if (right->is_single_xmm()) {
2262       XMMRegister rreg = right->as_xmm_float_reg();
2263       switch (code) {
2264         case lir_add: __ addss(lreg, rreg);  break;
2265         case lir_sub: __ subss(lreg, rreg);  break;
2266         case lir_mul_strictfp: // fall through
2267         case lir_mul: __ mulss(lreg, rreg);  break;
2268         case lir_div_strictfp: // fall through
2269         case lir_div: __ divss(lreg, rreg);  break;
2270         default: ShouldNotReachHere();
2271       }
2272     } else {
2273       Address raddr;
2274       if (right->is_single_stack()) {
2275         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2276       } else if (right->is_constant()) {
2277         // hack for now
2278         raddr = __ as_Address(InternalAddress(float_constant(right->as_jfloat())));
2279       } else {
2280         ShouldNotReachHere();
2281       }
2282       switch (code) {
2283         case lir_add: __ addss(lreg, raddr);  break;
2284         case lir_sub: __ subss(lreg, raddr);  break;
2285         case lir_mul_strictfp: // fall through
2286         case lir_mul: __ mulss(lreg, raddr);  break;
2287         case lir_div_strictfp: // fall through
2288         case lir_div: __ divss(lreg, raddr);  break;
2289         default: ShouldNotReachHere();
2290       }
2291     }
2292 
2293   } else if (left->is_double_xmm()) {
2294     assert(left == dest, "left and dest must be equal");
2295 
2296     XMMRegister lreg = left->as_xmm_double_reg();
2297     if (right->is_double_xmm()) {
2298       XMMRegister rreg = right->as_xmm_double_reg();
2299       switch (code) {
2300         case lir_add: __ addsd(lreg, rreg);  break;
2301         case lir_sub: __ subsd(lreg, rreg);  break;
2302         case lir_mul_strictfp: // fall through
2303         case lir_mul: __ mulsd(lreg, rreg);  break;
2304         case lir_div_strictfp: // fall through
2305         case lir_div: __ divsd(lreg, rreg);  break;
2306         default: ShouldNotReachHere();
2307       }
2308     } else {
2309       Address raddr;
2310       if (right->is_double_stack()) {
2311         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2312       } else if (right->is_constant()) {
2313         // hack for now
2314         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2315       } else {
2316         ShouldNotReachHere();
2317       }
2318       switch (code) {
2319         case lir_add: __ addsd(lreg, raddr);  break;
2320         case lir_sub: __ subsd(lreg, raddr);  break;
2321         case lir_mul_strictfp: // fall through
2322         case lir_mul: __ mulsd(lreg, raddr);  break;
2323         case lir_div_strictfp: // fall through
2324         case lir_div: __ divsd(lreg, raddr);  break;
2325         default: ShouldNotReachHere();
2326       }
2327     }
2328 
2329   } else if (left->is_single_fpu()) {
2330     assert(dest->is_single_fpu(),  "fpu stack allocation required");
2331 
2332     if (right->is_single_fpu()) {
2333       arith_fpu_implementation(code, left->fpu_regnr(), right->fpu_regnr(), dest->fpu_regnr(), pop_fpu_stack);
2334 
2335     } else {
2336       assert(left->fpu_regnr() == 0, "left must be on TOS");
2337       assert(dest->fpu_regnr() == 0, "dest must be on TOS");
2338 
2339       Address raddr;
2340       if (right->is_single_stack()) {
2341         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2342       } else if (right->is_constant()) {
2343         address const_addr = float_constant(right->as_jfloat());
2344         assert(const_addr != NULL, "incorrect float/double constant maintainance");
2345         // hack for now
2346         raddr = __ as_Address(InternalAddress(const_addr));
2347       } else {
2348         ShouldNotReachHere();
2349       }
2350 
2351       switch (code) {
2352         case lir_add: __ fadd_s(raddr); break;
2353         case lir_sub: __ fsub_s(raddr); break;
2354         case lir_mul_strictfp: // fall through
2355         case lir_mul: __ fmul_s(raddr); break;
2356         case lir_div_strictfp: // fall through
2357         case lir_div: __ fdiv_s(raddr); break;
2358         default:      ShouldNotReachHere();
2359       }
2360     }
2361 
2362   } else if (left->is_double_fpu()) {
2363     assert(dest->is_double_fpu(),  "fpu stack allocation required");
2364 
2365     if (code == lir_mul_strictfp || code == lir_div_strictfp) {
2366       // Double values require special handling for strictfp mul/div on x86
2367       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
2368       __ fmulp(left->fpu_regnrLo() + 1);
2369     }
2370 
2371     if (right->is_double_fpu()) {
2372       arith_fpu_implementation(code, left->fpu_regnrLo(), right->fpu_regnrLo(), dest->fpu_regnrLo(), pop_fpu_stack);
2373 
2374     } else {
2375       assert(left->fpu_regnrLo() == 0, "left must be on TOS");
2376       assert(dest->fpu_regnrLo() == 0, "dest must be on TOS");
2377 
2378       Address raddr;
2379       if (right->is_double_stack()) {
2380         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2381       } else if (right->is_constant()) {
2382         // hack for now
2383         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2384       } else {
2385         ShouldNotReachHere();
2386       }
2387 
2388       switch (code) {
2389         case lir_add: __ fadd_d(raddr); break;
2390         case lir_sub: __ fsub_d(raddr); break;
2391         case lir_mul_strictfp: // fall through
2392         case lir_mul: __ fmul_d(raddr); break;
2393         case lir_div_strictfp: // fall through
2394         case lir_div: __ fdiv_d(raddr); break;
2395         default: ShouldNotReachHere();
2396       }
2397     }
2398 
2399     if (code == lir_mul_strictfp || code == lir_div_strictfp) {
2400       // Double values require special handling for strictfp mul/div on x86
2401       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
2402       __ fmulp(dest->fpu_regnrLo() + 1);
2403     }
2404 
2405   } else if (left->is_single_stack() || left->is_address()) {
2406     assert(left == dest, "left and dest must be equal");
2407 
2408     Address laddr;
2409     if (left->is_single_stack()) {
2410       laddr = frame_map()->address_for_slot(left->single_stack_ix());
2411     } else if (left->is_address()) {
2412       laddr = as_Address(left->as_address_ptr());
2413     } else {
2414       ShouldNotReachHere();
2415     }
2416 
2417     if (right->is_single_cpu()) {
2418       Register rreg = right->as_register();
2419       switch (code) {
2420         case lir_add: __ addl(laddr, rreg); break;
2421         case lir_sub: __ subl(laddr, rreg); break;
2422         default:      ShouldNotReachHere();
2423       }
2424     } else if (right->is_constant()) {
2425       jint c = right->as_constant_ptr()->as_jint();
2426       switch (code) {
2427         case lir_add: {
2428           __ incrementl(laddr, c);
2429           break;
2430         }
2431         case lir_sub: {
2432           __ decrementl(laddr, c);
2433           break;
2434         }
2435         default: ShouldNotReachHere();
2436       }
2437     } else {
2438       ShouldNotReachHere();
2439     }
2440 
2441   } else {
2442     ShouldNotReachHere();
2443   }
2444 }
2445 
2446 void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) {
2447   assert(pop_fpu_stack  || (left_index     == dest_index || right_index     == dest_index), "invalid LIR");
2448   assert(!pop_fpu_stack || (left_index - 1 == dest_index || right_index - 1 == dest_index), "invalid LIR");
2449   assert(left_index == 0 || right_index == 0, "either must be on top of stack");
2450 
2451   bool left_is_tos = (left_index == 0);
2452   bool dest_is_tos = (dest_index == 0);
2453   int non_tos_index = (left_is_tos ? right_index : left_index);
2454 
2455   switch (code) {
2456     case lir_add:
2457       if (pop_fpu_stack)       __ faddp(non_tos_index);
2458       else if (dest_is_tos)    __ fadd (non_tos_index);
2459       else                     __ fadda(non_tos_index);
2460       break;
2461 
2462     case lir_sub:
2463       if (left_is_tos) {
2464         if (pop_fpu_stack)     __ fsubrp(non_tos_index);
2465         else if (dest_is_tos)  __ fsub  (non_tos_index);
2466         else                   __ fsubra(non_tos_index);
2467       } else {
2468         if (pop_fpu_stack)     __ fsubp (non_tos_index);
2469         else if (dest_is_tos)  __ fsubr (non_tos_index);
2470         else                   __ fsuba (non_tos_index);
2471       }
2472       break;
2473 
2474     case lir_mul_strictfp: // fall through
2475     case lir_mul:
2476       if (pop_fpu_stack)       __ fmulp(non_tos_index);
2477       else if (dest_is_tos)    __ fmul (non_tos_index);
2478       else                     __ fmula(non_tos_index);
2479       break;
2480 
2481     case lir_div_strictfp: // fall through
2482     case lir_div:
2483       if (left_is_tos) {
2484         if (pop_fpu_stack)     __ fdivrp(non_tos_index);
2485         else if (dest_is_tos)  __ fdiv  (non_tos_index);
2486         else                   __ fdivra(non_tos_index);
2487       } else {
2488         if (pop_fpu_stack)     __ fdivp (non_tos_index);
2489         else if (dest_is_tos)  __ fdivr (non_tos_index);
2490         else                   __ fdiva (non_tos_index);
2491       }
2492       break;
2493 
2494     case lir_rem:
2495       assert(left_is_tos && dest_is_tos && right_index == 1, "must be guaranteed by FPU stack allocation");
2496       __ fremr(noreg);
2497       break;
2498 
2499     default:
2500       ShouldNotReachHere();
2501   }
2502 }
2503 
2504 
2505 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
2506   if (value->is_double_xmm()) {
2507     switch(code) {
2508       case lir_abs :
2509         {
2510           if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
2511             __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
2512           }
2513           __ andpd(dest->as_xmm_double_reg(),
2514                     ExternalAddress((address)double_signmask_pool));
2515         }
2516         break;
2517 
2518       case lir_sqrt: __ sqrtsd(dest->as_xmm_double_reg(), value->as_xmm_double_reg()); break;
2519       // all other intrinsics are not available in the SSE instruction set, so FPU is used
2520       default      : ShouldNotReachHere();
2521     }
2522 
2523   } else if (value->is_double_fpu()) {
2524     assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
2525     switch(code) {
2526       case lir_log   : __ flog() ; break;
2527       case lir_log10 : __ flog10() ; break;
2528       case lir_abs   : __ fabs() ; break;
2529       case lir_sqrt  : __ fsqrt(); break;
2530       case lir_sin   :
2531         // Should consider not saving rbx, if not necessary
2532         __ trigfunc('s', op->as_Op2()->fpu_stack_size());
2533         break;
2534       case lir_cos :
2535         // Should consider not saving rbx, if not necessary
2536         assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots");
2537         __ trigfunc('c', op->as_Op2()->fpu_stack_size());
2538         break;
2539       case lir_tan :
2540         // Should consider not saving rbx, if not necessary
2541         __ trigfunc('t', op->as_Op2()->fpu_stack_size());
2542         break;
2543       case lir_exp :
2544         __ exp_with_fallback(op->as_Op2()->fpu_stack_size());
2545         break;
2546       case lir_pow :
2547         __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
2548         break;
2549       default      : ShouldNotReachHere();
2550     }
2551   } else {
2552     Unimplemented();
2553   }
2554 }
2555 
2556 void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
2557   // assert(left->destroys_register(), "check");
2558   if (left->is_single_cpu()) {
2559     Register reg = left->as_register();
2560     if (right->is_constant()) {
2561       int val = right->as_constant_ptr()->as_jint();
2562       switch (code) {
2563         case lir_logic_and: __ andl (reg, val); break;
2564         case lir_logic_or:  __ orl  (reg, val); break;
2565         case lir_logic_xor: __ xorl (reg, val); break;
2566         default: ShouldNotReachHere();
2567       }
2568     } else if (right->is_stack()) {
2569       // added support for stack operands
2570       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2571       switch (code) {
2572         case lir_logic_and: __ andl (reg, raddr); break;
2573         case lir_logic_or:  __ orl  (reg, raddr); break;
2574         case lir_logic_xor: __ xorl (reg, raddr); break;
2575         default: ShouldNotReachHere();
2576       }
2577     } else {
2578       Register rright = right->as_register();
2579       switch (code) {
2580         case lir_logic_and: __ andptr (reg, rright); break;
2581         case lir_logic_or : __ orptr  (reg, rright); break;
2582         case lir_logic_xor: __ xorptr (reg, rright); break;
2583         default: ShouldNotReachHere();
2584       }
2585     }
2586     move_regs(reg, dst->as_register());
2587   } else {
2588     Register l_lo = left->as_register_lo();
2589     Register l_hi = left->as_register_hi();
2590     if (right->is_constant()) {
2591 #ifdef _LP64
2592       __ mov64(rscratch1, right->as_constant_ptr()->as_jlong());
2593       switch (code) {
2594         case lir_logic_and:
2595           __ andq(l_lo, rscratch1);
2596           break;
2597         case lir_logic_or:
2598           __ orq(l_lo, rscratch1);
2599           break;
2600         case lir_logic_xor:
2601           __ xorq(l_lo, rscratch1);
2602           break;
2603         default: ShouldNotReachHere();
2604       }
2605 #else
2606       int r_lo = right->as_constant_ptr()->as_jint_lo();
2607       int r_hi = right->as_constant_ptr()->as_jint_hi();
2608       switch (code) {
2609         case lir_logic_and:
2610           __ andl(l_lo, r_lo);
2611           __ andl(l_hi, r_hi);
2612           break;
2613         case lir_logic_or:
2614           __ orl(l_lo, r_lo);
2615           __ orl(l_hi, r_hi);
2616           break;
2617         case lir_logic_xor:
2618           __ xorl(l_lo, r_lo);
2619           __ xorl(l_hi, r_hi);
2620           break;
2621         default: ShouldNotReachHere();
2622       }
2623 #endif // _LP64
2624     } else {
2625 #ifdef _LP64
2626       Register r_lo;
2627       if (right->type() == T_OBJECT || right->type() == T_ARRAY) {
2628         r_lo = right->as_register();
2629       } else {
2630         r_lo = right->as_register_lo();
2631       }
2632 #else
2633       Register r_lo = right->as_register_lo();
2634       Register r_hi = right->as_register_hi();
2635       assert(l_lo != r_hi, "overwriting registers");
2636 #endif
2637       switch (code) {
2638         case lir_logic_and:
2639           __ andptr(l_lo, r_lo);
2640           NOT_LP64(__ andptr(l_hi, r_hi);)
2641           break;
2642         case lir_logic_or:
2643           __ orptr(l_lo, r_lo);
2644           NOT_LP64(__ orptr(l_hi, r_hi);)
2645           break;
2646         case lir_logic_xor:
2647           __ xorptr(l_lo, r_lo);
2648           NOT_LP64(__ xorptr(l_hi, r_hi);)
2649           break;
2650         default: ShouldNotReachHere();
2651       }
2652     }
2653 
2654     Register dst_lo = dst->as_register_lo();
2655     Register dst_hi = dst->as_register_hi();
2656 
2657 #ifdef _LP64
2658     move_regs(l_lo, dst_lo);
2659 #else
2660     if (dst_lo == l_hi) {
2661       assert(dst_hi != l_lo, "overwriting registers");
2662       move_regs(l_hi, dst_hi);
2663       move_regs(l_lo, dst_lo);
2664     } else {
2665       assert(dst_lo != l_hi, "overwriting registers");
2666       move_regs(l_lo, dst_lo);
2667       move_regs(l_hi, dst_hi);
2668     }
2669 #endif // _LP64
2670   }
2671 }
2672 
2673 
2674 // we assume that rax, and rdx can be overwritten
2675 void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
2676 
2677   assert(left->is_single_cpu(),   "left must be register");
2678   assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
2679   assert(result->is_single_cpu(), "result must be register");
2680 
2681   //  assert(left->destroys_register(), "check");
2682   //  assert(right->destroys_register(), "check");
2683 
2684   Register lreg = left->as_register();
2685   Register dreg = result->as_register();
2686 
2687   if (right->is_constant()) {
2688     int divisor = right->as_constant_ptr()->as_jint();
2689     assert(divisor > 0 && is_power_of_2(divisor), "must be");
2690     if (code == lir_idiv) {
2691       assert(lreg == rax, "must be rax,");
2692       assert(temp->as_register() == rdx, "tmp register must be rdx");
2693       __ cdql(); // sign extend into rdx:rax
2694       if (divisor == 2) {
2695         __ subl(lreg, rdx);
2696       } else {
2697         __ andl(rdx, divisor - 1);
2698         __ addl(lreg, rdx);
2699       }
2700       __ sarl(lreg, log2_intptr(divisor));
2701       move_regs(lreg, dreg);
2702     } else if (code == lir_irem) {
2703       Label done;
2704       __ mov(dreg, lreg);
2705       __ andl(dreg, 0x80000000 | (divisor - 1));
2706       __ jcc(Assembler::positive, done);
2707       __ decrement(dreg);
2708       __ orl(dreg, ~(divisor - 1));
2709       __ increment(dreg);
2710       __ bind(done);
2711     } else {
2712       ShouldNotReachHere();
2713     }
2714   } else {
2715     Register rreg = right->as_register();
2716     assert(lreg == rax, "left register must be rax,");
2717     assert(rreg != rdx, "right register must not be rdx");
2718     assert(temp->as_register() == rdx, "tmp register must be rdx");
2719 
2720     move_regs(lreg, rax);
2721 
2722     int idivl_offset = __ corrected_idivl(rreg);
2723     add_debug_info_for_div0(idivl_offset, info);
2724     if (code == lir_irem) {
2725       move_regs(rdx, dreg); // result is in rdx
2726     } else {
2727       move_regs(rax, dreg);
2728     }
2729   }
2730 }
2731 
2732 
2733 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
2734   if (opr1->is_single_cpu()) {
2735     Register reg1 = opr1->as_register();
2736     if (opr2->is_single_cpu()) {
2737       // cpu register - cpu register
2738       if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
2739         __ cmpptr(reg1, opr2->as_register());
2740       } else {
2741         assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
2742         __ cmpl(reg1, opr2->as_register());
2743       }
2744     } else if (opr2->is_stack()) {
2745       // cpu register - stack
2746       if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
2747         __ cmpptr(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2748       } else {
2749         __ cmpl(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2750       }
2751     } else if (opr2->is_constant()) {
2752       // cpu register - constant
2753       LIR_Const* c = opr2->as_constant_ptr();
2754       if (c->type() == T_INT) {
2755         __ cmpl(reg1, c->as_jint());
2756       } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2757         // In 64bit oops are single register
2758         jobject o = c->as_jobject();
2759         if (o == NULL) {
2760           __ cmpptr(reg1, (int32_t)NULL_WORD);
2761         } else {
2762 #ifdef _LP64
2763           __ movoop(rscratch1, o);
2764           __ cmpptr(reg1, rscratch1);
2765 #else
2766           __ cmpoop(reg1, c->as_jobject());
2767 #endif // _LP64
2768         }
2769       } else {
2770         fatal(err_msg("unexpected type: %s", basictype_to_str(c->type())));
2771       }
2772       // cpu register - address
2773     } else if (opr2->is_address()) {
2774       if (op->info() != NULL) {
2775         add_debug_info_for_null_check_here(op->info());
2776       }
2777       __ cmpl(reg1, as_Address(opr2->as_address_ptr()));
2778     } else {
2779       ShouldNotReachHere();
2780     }
2781 
2782   } else if(opr1->is_double_cpu()) {
2783     Register xlo = opr1->as_register_lo();
2784     Register xhi = opr1->as_register_hi();
2785     if (opr2->is_double_cpu()) {
2786 #ifdef _LP64
2787       __ cmpptr(xlo, opr2->as_register_lo());
2788 #else
2789       // cpu register - cpu register
2790       Register ylo = opr2->as_register_lo();
2791       Register yhi = opr2->as_register_hi();
2792       __ subl(xlo, ylo);
2793       __ sbbl(xhi, yhi);
2794       if (condition == lir_cond_equal || condition == lir_cond_notEqual) {
2795         __ orl(xhi, xlo);
2796       }
2797 #endif // _LP64
2798     } else if (opr2->is_constant()) {
2799       // cpu register - constant 0
2800       assert(opr2->as_jlong() == (jlong)0, "only handles zero");
2801 #ifdef _LP64
2802       __ cmpptr(xlo, (int32_t)opr2->as_jlong());
2803 #else
2804       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles equals case");
2805       __ orl(xhi, xlo);
2806 #endif // _LP64
2807     } else {
2808       ShouldNotReachHere();
2809     }
2810 
2811   } else if (opr1->is_single_xmm()) {
2812     XMMRegister reg1 = opr1->as_xmm_float_reg();
2813     if (opr2->is_single_xmm()) {
2814       // xmm register - xmm register
2815       __ ucomiss(reg1, opr2->as_xmm_float_reg());
2816     } else if (opr2->is_stack()) {
2817       // xmm register - stack
2818       __ ucomiss(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2819     } else if (opr2->is_constant()) {
2820       // xmm register - constant
2821       __ ucomiss(reg1, InternalAddress(float_constant(opr2->as_jfloat())));
2822     } else if (opr2->is_address()) {
2823       // xmm register - address
2824       if (op->info() != NULL) {
2825         add_debug_info_for_null_check_here(op->info());
2826       }
2827       __ ucomiss(reg1, as_Address(opr2->as_address_ptr()));
2828     } else {
2829       ShouldNotReachHere();
2830     }
2831 
2832   } else if (opr1->is_double_xmm()) {
2833     XMMRegister reg1 = opr1->as_xmm_double_reg();
2834     if (opr2->is_double_xmm()) {
2835       // xmm register - xmm register
2836       __ ucomisd(reg1, opr2->as_xmm_double_reg());
2837     } else if (opr2->is_stack()) {
2838       // xmm register - stack
2839       __ ucomisd(reg1, frame_map()->address_for_slot(opr2->double_stack_ix()));
2840     } else if (opr2->is_constant()) {
2841       // xmm register - constant
2842       __ ucomisd(reg1, InternalAddress(double_constant(opr2->as_jdouble())));
2843     } else if (opr2->is_address()) {
2844       // xmm register - address
2845       if (op->info() != NULL) {
2846         add_debug_info_for_null_check_here(op->info());
2847       }
2848       __ ucomisd(reg1, as_Address(opr2->pointer()->as_address()));
2849     } else {
2850       ShouldNotReachHere();
2851     }
2852 
2853   } else if(opr1->is_single_fpu() || opr1->is_double_fpu()) {
2854     assert(opr1->is_fpu_register() && opr1->fpu() == 0, "currently left-hand side must be on TOS (relax this restriction)");
2855     assert(opr2->is_fpu_register(), "both must be registers");
2856     __ fcmp(noreg, opr2->fpu(), op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2857 
2858   } else if (opr1->is_address() && opr2->is_constant()) {
2859     LIR_Const* c = opr2->as_constant_ptr();
2860 #ifdef _LP64
2861     if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2862       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "need to reverse");
2863       __ movoop(rscratch1, c->as_jobject());
2864     }
2865 #endif // LP64
2866     if (op->info() != NULL) {
2867       add_debug_info_for_null_check_here(op->info());
2868     }
2869     // special case: address - constant
2870     LIR_Address* addr = opr1->as_address_ptr();
2871     if (c->type() == T_INT) {
2872       __ cmpl(as_Address(addr), c->as_jint());
2873     } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2874 #ifdef _LP64
2875       // %%% Make this explode if addr isn't reachable until we figure out a
2876       // better strategy by giving noreg as the temp for as_Address
2877       __ cmpptr(rscratch1, as_Address(addr, noreg));
2878 #else
2879       __ cmpoop(as_Address(addr), c->as_jobject());
2880 #endif // _LP64
2881     } else {
2882       ShouldNotReachHere();
2883     }
2884 
2885   } else {
2886     ShouldNotReachHere();
2887   }
2888 }
2889 
2890 void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
2891   if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
2892     if (left->is_single_xmm()) {
2893       assert(right->is_single_xmm(), "must match");
2894       __ cmpss2int(left->as_xmm_float_reg(), right->as_xmm_float_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2895     } else if (left->is_double_xmm()) {
2896       assert(right->is_double_xmm(), "must match");
2897       __ cmpsd2int(left->as_xmm_double_reg(), right->as_xmm_double_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2898 
2899     } else {
2900       assert(left->is_single_fpu() || left->is_double_fpu(), "must be");
2901       assert(right->is_single_fpu() || right->is_double_fpu(), "must match");
2902 
2903       assert(left->fpu() == 0, "left must be on TOS");
2904       __ fcmp2int(dst->as_register(), code == lir_ucmp_fd2i, right->fpu(),
2905                   op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2906     }
2907   } else {
2908     assert(code == lir_cmp_l2i, "check");
2909 #ifdef _LP64
2910     Label done;
2911     Register dest = dst->as_register();
2912     __ cmpptr(left->as_register_lo(), right->as_register_lo());
2913     __ movl(dest, -1);
2914     __ jccb(Assembler::less, done);
2915     __ set_byte_if_not_zero(dest);
2916     __ movzbl(dest, dest);
2917     __ bind(done);
2918 #else
2919     __ lcmp2int(left->as_register_hi(),
2920                 left->as_register_lo(),
2921                 right->as_register_hi(),
2922                 right->as_register_lo());
2923     move_regs(left->as_register_hi(), dst->as_register());
2924 #endif // _LP64
2925   }
2926 }
2927 
2928 
2929 void LIR_Assembler::align_call(LIR_Code code) {
2930   if (os::is_MP()) {
2931     // make sure that the displacement word of the call ends up word aligned
2932     int offset = __ offset();
2933     switch (code) {
2934       case lir_static_call:
2935       case lir_optvirtual_call:
2936       case lir_dynamic_call:
2937         offset += NativeCall::displacement_offset;
2938         break;
2939       case lir_icvirtual_call:
2940         offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
2941       break;
2942       case lir_virtual_call:  // currently, sparc-specific for niagara
2943       default: ShouldNotReachHere();
2944     }
2945     __ align(BytesPerWord, offset);
2946   }
2947 }
2948 
2949 
2950 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
2951   assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
2952          "must be aligned");
2953   __ call(AddressLiteral(op->addr(), rtype));
2954   add_call_info(code_offset(), op->info());
2955 }
2956 
2957 
2958 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
2959   __ ic_call(op->addr());
2960   add_call_info(code_offset(), op->info());
2961   assert(!os::is_MP() ||
2962          (__ offset() - NativeCall::instruction_size + NativeCall::displacement_offset) % BytesPerWord == 0,
2963          "must be aligned");
2964 }
2965 
2966 
2967 /* Currently, vtable-dispatch is only enabled for sparc platforms */
2968 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
2969   ShouldNotReachHere();
2970 }
2971 
2972 
2973 void LIR_Assembler::emit_static_call_stub() {
2974   address call_pc = __ pc();
2975   address stub = __ start_a_stub(call_stub_size);
2976   if (stub == NULL) {
2977     bailout("static call stub overflow");
2978     return;
2979   }
2980 
2981   int start = __ offset();
2982   if (os::is_MP()) {
2983     // make sure that the displacement word of the call ends up word aligned
2984     __ align(BytesPerWord, __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset);
2985   }
2986   __ relocate(static_stub_Relocation::spec(call_pc));
2987   __ mov_metadata(rbx, (Metadata*)NULL);
2988   // must be set to -1 at code generation time
2989   assert(!os::is_MP() || ((__ offset() + 1) % BytesPerWord) == 0, "must be aligned on MP");
2990   // On 64bit this will die since it will take a movq & jmp, must be only a jmp
2991   __ jump(RuntimeAddress(__ pc()));
2992 
2993   assert(__ offset() - start <= call_stub_size, "stub too big");
2994   __ end_a_stub();
2995 }
2996 
2997 
2998 void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
2999   assert(exceptionOop->as_register() == rax, "must match");
3000   assert(exceptionPC->as_register() == rdx, "must match");
3001 
3002   // exception object is not added to oop map by LinearScan
3003   // (LinearScan assumes that no oops are in fixed registers)
3004   info->add_register_oop(exceptionOop);
3005   Runtime1::StubID unwind_id;
3006 
3007   // get current pc information
3008   // pc is only needed if the method has an exception handler, the unwind code does not need it.
3009   int pc_for_athrow_offset = __ offset();
3010   InternalAddress pc_for_athrow(__ pc());
3011   __ lea(exceptionPC->as_register(), pc_for_athrow);
3012   add_call_info(pc_for_athrow_offset, info); // for exception handler
3013 
3014   __ verify_not_null_oop(rax);
3015   // search an exception handler (rax: exception oop, rdx: throwing pc)
3016   if (compilation()->has_fpu_code()) {
3017     unwind_id = Runtime1::handle_exception_id;
3018   } else {
3019     unwind_id = Runtime1::handle_exception_nofpu_id;
3020   }
3021   __ call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
3022 
3023   // enough room for two byte trap
3024   __ nop();
3025 }
3026 
3027 
3028 void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
3029   assert(exceptionOop->as_register() == rax, "must match");
3030 
3031   __ jmp(_unwind_handler_entry);
3032 }
3033 
3034 
3035 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
3036 
3037   // optimized version for linear scan:
3038   // * count must be already in ECX (guaranteed by LinearScan)
3039   // * left and dest must be equal
3040   // * tmp must be unused
3041   assert(count->as_register() == SHIFT_count, "count must be in ECX");
3042   assert(left == dest, "left and dest must be equal");
3043   assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
3044 
3045   if (left->is_single_cpu()) {
3046     Register value = left->as_register();
3047     assert(value != SHIFT_count, "left cannot be ECX");
3048 
3049     switch (code) {
3050       case lir_shl:  __ shll(value); break;
3051       case lir_shr:  __ sarl(value); break;
3052       case lir_ushr: __ shrl(value); break;
3053       default: ShouldNotReachHere();
3054     }
3055   } else if (left->is_double_cpu()) {
3056     Register lo = left->as_register_lo();
3057     Register hi = left->as_register_hi();
3058     assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");
3059 #ifdef _LP64
3060     switch (code) {
3061       case lir_shl:  __ shlptr(lo);        break;
3062       case lir_shr:  __ sarptr(lo);        break;
3063       case lir_ushr: __ shrptr(lo);        break;
3064       default: ShouldNotReachHere();
3065     }
3066 #else
3067 
3068     switch (code) {
3069       case lir_shl:  __ lshl(hi, lo);        break;
3070       case lir_shr:  __ lshr(hi, lo, true);  break;
3071       case lir_ushr: __ lshr(hi, lo, false); break;
3072       default: ShouldNotReachHere();
3073     }
3074 #endif // LP64
3075   } else {
3076     ShouldNotReachHere();
3077   }
3078 }
3079 
3080 
3081 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
3082   if (dest->is_single_cpu()) {
3083     // first move left into dest so that left is not destroyed by the shift
3084     Register value = dest->as_register();
3085     count = count & 0x1F; // Java spec
3086 
3087     move_regs(left->as_register(), value);
3088     switch (code) {
3089       case lir_shl:  __ shll(value, count); break;
3090       case lir_shr:  __ sarl(value, count); break;
3091       case lir_ushr: __ shrl(value, count); break;
3092       default: ShouldNotReachHere();
3093     }
3094   } else if (dest->is_double_cpu()) {
3095 #ifndef _LP64
3096     Unimplemented();
3097 #else
3098     // first move left into dest so that left is not destroyed by the shift
3099     Register value = dest->as_register_lo();
3100     count = count & 0x1F; // Java spec
3101 
3102     move_regs(left->as_register_lo(), value);
3103     switch (code) {
3104       case lir_shl:  __ shlptr(value, count); break;
3105       case lir_shr:  __ sarptr(value, count); break;
3106       case lir_ushr: __ shrptr(value, count); break;
3107       default: ShouldNotReachHere();
3108     }
3109 #endif // _LP64
3110   } else {
3111     ShouldNotReachHere();
3112   }
3113 }
3114 
3115 
3116 void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
3117   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3118   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3119   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3120   __ movptr (Address(rsp, offset_from_rsp_in_bytes), r);
3121 }
3122 
3123 
3124 void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
3125   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3126   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3127   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3128   __ movptr (Address(rsp, offset_from_rsp_in_bytes), c);
3129 }
3130 
3131 
3132 void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
3133   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3134   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3135   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3136   __ movoop (Address(rsp, offset_from_rsp_in_bytes), o);
3137 }
3138 
3139 
3140 // This code replaces a call to arraycopy; no exception may
3141 // be thrown in this code, they must be thrown in the System.arraycopy
3142 // activation frame; we could save some checks if this would not be the case
3143 void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
3144   ciArrayKlass* default_type = op->expected_type();
3145   Register src = op->src()->as_register();
3146   Register dst = op->dst()->as_register();
3147   Register src_pos = op->src_pos()->as_register();
3148   Register dst_pos = op->dst_pos()->as_register();
3149   Register length  = op->length()->as_register();
3150   Register tmp = op->tmp()->as_register();
3151 
3152   CodeStub* stub = op->stub();
3153   int flags = op->flags();
3154   BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
3155   if (basic_type == T_ARRAY) basic_type = T_OBJECT;
3156 
3157   // if we don't know anything, just go through the generic arraycopy
3158   if (default_type == NULL) {
3159     Label done;
3160     // save outgoing arguments on stack in case call to System.arraycopy is needed
3161     // HACK ALERT. This code used to push the parameters in a hardwired fashion
3162     // for interpreter calling conventions. Now we have to do it in new style conventions.
3163     // For the moment until C1 gets the new register allocator I just force all the
3164     // args to the right place (except the register args) and then on the back side
3165     // reload the register args properly if we go slow path. Yuck
3166 
3167     // These are proper for the calling convention
3168     store_parameter(length, 2);
3169     store_parameter(dst_pos, 1);
3170     store_parameter(dst, 0);
3171 
3172     // these are just temporary placements until we need to reload
3173     store_parameter(src_pos, 3);
3174     store_parameter(src, 4);
3175     NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");)
3176 
3177     address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
3178 
3179     address copyfunc_addr = StubRoutines::generic_arraycopy();
3180 
3181     // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint
3182 #ifdef _LP64
3183     // The arguments are in java calling convention so we can trivially shift them to C
3184     // convention
3185     assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
3186     __ mov(c_rarg0, j_rarg0);
3187     assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
3188     __ mov(c_rarg1, j_rarg1);
3189     assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
3190     __ mov(c_rarg2, j_rarg2);
3191     assert_different_registers(c_rarg3, j_rarg4);
3192     __ mov(c_rarg3, j_rarg3);
3193 #ifdef _WIN64
3194     // Allocate abi space for args but be sure to keep stack aligned
3195     __ subptr(rsp, 6*wordSize);
3196     store_parameter(j_rarg4, 4);
3197     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3198       __ call(RuntimeAddress(C_entry));
3199     } else {
3200 #ifndef PRODUCT
3201       if (PrintC1Statistics) {
3202         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3203       }
3204 #endif
3205       __ call(RuntimeAddress(copyfunc_addr));
3206     }
3207     __ addptr(rsp, 6*wordSize);
3208 #else
3209     __ mov(c_rarg4, j_rarg4);
3210     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3211       __ call(RuntimeAddress(C_entry));
3212     } else {
3213 #ifndef PRODUCT
3214       if (PrintC1Statistics) {
3215         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3216       }
3217 #endif
3218       __ call(RuntimeAddress(copyfunc_addr));
3219     }
3220 #endif // _WIN64
3221 #else
3222     __ push(length);
3223     __ push(dst_pos);
3224     __ push(dst);
3225     __ push(src_pos);
3226     __ push(src);
3227 
3228     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3229       __ call_VM_leaf(C_entry, 5); // removes pushed parameter from the stack
3230     } else {
3231 #ifndef PRODUCT
3232       if (PrintC1Statistics) {
3233         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3234       }
3235 #endif
3236       __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack
3237     }
3238 
3239 #endif // _LP64
3240 
3241     __ cmpl(rax, 0);
3242     __ jcc(Assembler::equal, *stub->continuation());
3243 
3244     if (copyfunc_addr != NULL) {
3245       __ mov(tmp, rax);
3246       __ xorl(tmp, -1);
3247     }
3248 
3249     // Reload values from the stack so they are where the stub
3250     // expects them.
3251     __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3252     __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3253     __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3254     __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3255     __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3256 
3257     if (copyfunc_addr != NULL) {
3258       __ subl(length, tmp);
3259       __ addl(src_pos, tmp);
3260       __ addl(dst_pos, tmp);
3261     }
3262     __ jmp(*stub->entry());
3263 
3264     __ bind(*stub->continuation());
3265     return;
3266   }
3267 
3268   assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
3269 
3270   int elem_size = type2aelembytes(basic_type);
3271   int shift_amount;
3272   Address::ScaleFactor scale;
3273 
3274   switch (elem_size) {
3275     case 1 :
3276       shift_amount = 0;
3277       scale = Address::times_1;
3278       break;
3279     case 2 :
3280       shift_amount = 1;
3281       scale = Address::times_2;
3282       break;
3283     case 4 :
3284       shift_amount = 2;
3285       scale = Address::times_4;
3286       break;
3287     case 8 :
3288       shift_amount = 3;
3289       scale = Address::times_8;
3290       break;
3291     default:
3292       ShouldNotReachHere();
3293   }
3294 
3295   Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
3296   Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
3297   Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
3298   Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
3299 
3300   // length and pos's are all sign extended at this point on 64bit
3301 
3302   // test for NULL
3303   if (flags & LIR_OpArrayCopy::src_null_check) {
3304     __ testptr(src, src);
3305     __ jcc(Assembler::zero, *stub->entry());
3306   }
3307   if (flags & LIR_OpArrayCopy::dst_null_check) {
3308     __ testptr(dst, dst);
3309     __ jcc(Assembler::zero, *stub->entry());
3310   }
3311 
3312   // check if negative
3313   if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
3314     __ testl(src_pos, src_pos);
3315     __ jcc(Assembler::less, *stub->entry());
3316   }
3317   if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
3318     __ testl(dst_pos, dst_pos);
3319     __ jcc(Assembler::less, *stub->entry());
3320   }
3321 
3322   if (flags & LIR_OpArrayCopy::src_range_check) {
3323     __ lea(tmp, Address(src_pos, length, Address::times_1, 0));
3324     __ cmpl(tmp, src_length_addr);
3325     __ jcc(Assembler::above, *stub->entry());
3326   }
3327   if (flags & LIR_OpArrayCopy::dst_range_check) {
3328     __ lea(tmp, Address(dst_pos, length, Address::times_1, 0));
3329     __ cmpl(tmp, dst_length_addr);
3330     __ jcc(Assembler::above, *stub->entry());
3331   }
3332 
3333   if (flags & LIR_OpArrayCopy::length_positive_check) {
3334     __ testl(length, length);
3335     __ jcc(Assembler::less, *stub->entry());
3336     __ jcc(Assembler::zero, *stub->continuation());
3337   }
3338 
3339 #ifdef _LP64
3340   __ movl2ptr(src_pos, src_pos); //higher 32bits must be null
3341   __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null
3342 #endif
3343 
3344   if (flags & LIR_OpArrayCopy::type_check) {
3345     // We don't know the array types are compatible
3346     if (basic_type != T_OBJECT) {
3347       // Simple test for basic type arrays
3348       if (UseCompressedClassPointers) {
3349         __ movl(tmp, src_klass_addr);
3350         __ cmpl(tmp, dst_klass_addr);
3351       } else {
3352         __ movptr(tmp, src_klass_addr);
3353         __ cmpptr(tmp, dst_klass_addr);
3354       }
3355       __ jcc(Assembler::notEqual, *stub->entry());
3356     } else {
3357       // For object arrays, if src is a sub class of dst then we can
3358       // safely do the copy.
3359       Label cont, slow;
3360 
3361       __ push(src);
3362       __ push(dst);
3363 
3364       __ load_klass(src, src);
3365       __ load_klass(dst, dst);
3366 
3367       __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
3368 
3369       __ push(src);
3370       __ push(dst);
3371       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
3372       __ pop(dst);
3373       __ pop(src);
3374 
3375       __ cmpl(src, 0);
3376       __ jcc(Assembler::notEqual, cont);
3377 
3378       __ bind(slow);
3379       __ pop(dst);
3380       __ pop(src);
3381 
3382       address copyfunc_addr = StubRoutines::checkcast_arraycopy();
3383       if (copyfunc_addr != NULL) { // use stub if available
3384         // src is not a sub class of dst so we have to do a
3385         // per-element check.
3386 
3387         int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
3388         if ((flags & mask) != mask) {
3389           // Check that at least both of them object arrays.
3390           assert(flags & mask, "one of the two should be known to be an object array");
3391 
3392           if (!(flags & LIR_OpArrayCopy::src_objarray)) {
3393             __ load_klass(tmp, src);
3394           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
3395             __ load_klass(tmp, dst);
3396           }
3397           int lh_offset = in_bytes(Klass::layout_helper_offset());
3398           Address klass_lh_addr(tmp, lh_offset);
3399           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3400           __ cmpl(klass_lh_addr, objArray_lh);
3401           __ jcc(Assembler::notEqual, *stub->entry());
3402         }
3403 
3404        // Spill because stubs can use any register they like and it's
3405        // easier to restore just those that we care about.
3406        store_parameter(dst, 0);
3407        store_parameter(dst_pos, 1);
3408        store_parameter(length, 2);
3409        store_parameter(src_pos, 3);
3410        store_parameter(src, 4);
3411 
3412 #ifndef _LP64
3413         __ movptr(tmp, dst_klass_addr);
3414         __ movptr(tmp, Address(tmp, ObjArrayKlass::element_klass_offset()));
3415         __ push(tmp);
3416         __ movl(tmp, Address(tmp, Klass::super_check_offset_offset()));
3417         __ push(tmp);
3418         __ push(length);
3419         __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3420         __ push(tmp);
3421         __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3422         __ push(tmp);
3423 
3424         __ call_VM_leaf(copyfunc_addr, 5);
3425 #else
3426         __ movl2ptr(length, length); //higher 32bits must be null
3427 
3428         __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3429         assert_different_registers(c_rarg0, dst, dst_pos, length);
3430         __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3431         assert_different_registers(c_rarg1, dst, length);
3432 
3433         __ mov(c_rarg2, length);
3434         assert_different_registers(c_rarg2, dst);
3435 
3436 #ifdef _WIN64
3437         // Allocate abi space for args but be sure to keep stack aligned
3438         __ subptr(rsp, 6*wordSize);
3439         __ load_klass(c_rarg3, dst);
3440         __ movptr(c_rarg3, Address(c_rarg3, ObjArrayKlass::element_klass_offset()));
3441         store_parameter(c_rarg3, 4);
3442         __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset()));
3443         __ call(RuntimeAddress(copyfunc_addr));
3444         __ addptr(rsp, 6*wordSize);
3445 #else
3446         __ load_klass(c_rarg4, dst);
3447         __ movptr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
3448         __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
3449         __ call(RuntimeAddress(copyfunc_addr));
3450 #endif
3451 
3452 #endif
3453 
3454 #ifndef PRODUCT
3455         if (PrintC1Statistics) {
3456           Label failed;
3457           __ testl(rax, rax);
3458           __ jcc(Assembler::notZero, failed);
3459           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
3460           __ bind(failed);
3461         }
3462 #endif
3463 
3464         __ testl(rax, rax);
3465         __ jcc(Assembler::zero, *stub->continuation());
3466 
3467 #ifndef PRODUCT
3468         if (PrintC1Statistics) {
3469           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
3470         }
3471 #endif
3472 
3473         __ mov(tmp, rax);
3474 
3475         __ xorl(tmp, -1);
3476 
3477         // Restore previously spilled arguments
3478         __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3479         __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3480         __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3481         __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3482         __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3483 
3484 
3485         __ subl(length, tmp);
3486         __ addl(src_pos, tmp);
3487         __ addl(dst_pos, tmp);
3488       }
3489 
3490       __ jmp(*stub->entry());
3491 
3492       __ bind(cont);
3493       __ pop(dst);
3494       __ pop(src);
3495     }
3496   }
3497 
3498 #ifdef ASSERT
3499   if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
3500     // Sanity check the known type with the incoming class.  For the
3501     // primitive case the types must match exactly with src.klass and
3502     // dst.klass each exactly matching the default type.  For the
3503     // object array case, if no type check is needed then either the
3504     // dst type is exactly the expected type and the src type is a
3505     // subtype which we can't check or src is the same array as dst
3506     // but not necessarily exactly of type default_type.
3507     Label known_ok, halt;
3508     __ mov_metadata(tmp, default_type->constant_encoding());
3509 #ifdef _LP64
3510     if (UseCompressedClassPointers) {
3511       __ encode_klass_not_null(tmp);
3512     }
3513 #endif
3514 
3515     if (basic_type != T_OBJECT) {
3516 
3517       if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
3518       else                   __ cmpptr(tmp, dst_klass_addr);
3519       __ jcc(Assembler::notEqual, halt);
3520       if (UseCompressedClassPointers)          __ cmpl(tmp, src_klass_addr);
3521       else                   __ cmpptr(tmp, src_klass_addr);
3522       __ jcc(Assembler::equal, known_ok);
3523     } else {
3524       if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
3525       else                   __ cmpptr(tmp, dst_klass_addr);
3526       __ jcc(Assembler::equal, known_ok);
3527       __ cmpptr(src, dst);
3528       __ jcc(Assembler::equal, known_ok);
3529     }
3530     __ bind(halt);
3531     __ stop("incorrect type information in arraycopy");
3532     __ bind(known_ok);
3533   }
3534 #endif
3535 
3536 #ifndef PRODUCT
3537   if (PrintC1Statistics) {
3538     __ incrementl(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
3539   }
3540 #endif
3541 
3542 #ifdef _LP64
3543   assert_different_registers(c_rarg0, dst, dst_pos, length);
3544   __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3545   assert_different_registers(c_rarg1, length);
3546   __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3547   __ mov(c_rarg2, length);
3548 
3549 #else
3550   __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3551   store_parameter(tmp, 0);
3552   __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3553   store_parameter(tmp, 1);
3554   store_parameter(length, 2);
3555 #endif // _LP64
3556 
3557   bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
3558   bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
3559   const char *name;
3560   address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
3561   __ call_VM_leaf(entry, 0);
3562 
3563   __ bind(*stub->continuation());
3564 }
3565 
3566 void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
3567   assert(op->crc()->is_single_cpu(),  "crc must be register");
3568   assert(op->val()->is_single_cpu(),  "byte value must be register");
3569   assert(op->result_opr()->is_single_cpu(), "result must be register");
3570   Register crc = op->crc()->as_register();
3571   Register val = op->val()->as_register();
3572   Register res = op->result_opr()->as_register();
3573 
3574   assert_different_registers(val, crc, res);
3575 
3576   __ lea(res, ExternalAddress(StubRoutines::crc_table_addr()));
3577   __ notl(crc); // ~crc
3578   __ update_byte_crc32(crc, val, res);
3579   __ notl(crc); // ~crc
3580   __ mov(res, crc);
3581 }
3582 
3583 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
3584   Register obj = op->obj_opr()->as_register();  // may not be an oop
3585   Register hdr = op->hdr_opr()->as_register();
3586   Register lock = op->lock_opr()->as_register();
3587   if (!UseFastLocking) {
3588     __ jmp(*op->stub()->entry());
3589   } else if (op->code() == lir_lock) {
3590     Register scratch = noreg;
3591     if (UseBiasedLocking) {
3592       scratch = op->scratch_opr()->as_register();
3593     }
3594     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3595     // add debug info for NullPointerException only if one is possible
3596     int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
3597     if (op->info() != NULL) {
3598       add_debug_info_for_null_check(null_check_offset, op->info());
3599     }
3600     // done
3601   } else if (op->code() == lir_unlock) {
3602     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3603     __ unlock_object(hdr, obj, lock, *op->stub()->entry());
3604   } else {
3605     Unimplemented();
3606   }
3607   __ bind(*op->stub()->continuation());
3608 }
3609 
3610 
3611 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
3612   ciMethod* method = op->profiled_method();
3613   int bci          = op->profiled_bci();
3614   ciMethod* callee = op->profiled_callee();
3615 
3616   // Update counter for all call types
3617   ciMethodData* md = method->method_data_or_null();
3618   assert(md != NULL, "Sanity");
3619   ciProfileData* data = md->bci_to_data(bci);
3620   assert(data->is_CounterData(), "need CounterData for calls");
3621   assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
3622   Register mdo  = op->mdo()->as_register();
3623   __ mov_metadata(mdo, md->constant_encoding());
3624   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
3625   Bytecodes::Code bc = method->java_code_at_bci(bci);
3626   const bool callee_is_static = callee->is_loaded() && callee->is_static();
3627   // Perform additional virtual call profiling for invokevirtual and
3628   // invokeinterface bytecodes
3629   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
3630       !callee_is_static &&  // required for optimized MH invokes
3631       C1ProfileVirtualCalls) {
3632     assert(op->recv()->is_single_cpu(), "recv must be allocated");
3633     Register recv = op->recv()->as_register();
3634     assert_different_registers(mdo, recv);
3635     assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
3636     ciKlass* known_klass = op->known_holder();
3637     if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
3638       // We know the type that will be seen at this call site; we can
3639       // statically update the MethodData* rather than needing to do
3640       // dynamic tests on the receiver type
3641 
3642       // NOTE: we should probably put a lock around this search to
3643       // avoid collisions by concurrent compilations
3644       ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
3645       uint i;
3646       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3647         ciKlass* receiver = vc_data->receiver(i);
3648         if (known_klass->equals(receiver)) {
3649           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3650           __ addptr(data_addr, DataLayout::counter_increment);
3651           return;
3652         }
3653       }
3654 
3655       // Receiver type not found in profile data; select an empty slot
3656 
3657       // Note that this is less efficient than it should be because it
3658       // always does a write to the receiver part of the
3659       // VirtualCallData rather than just the first time
3660       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3661         ciKlass* receiver = vc_data->receiver(i);
3662         if (receiver == NULL) {
3663           Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
3664           __ mov_metadata(recv_addr, known_klass->constant_encoding());
3665           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3666           __ addptr(data_addr, DataLayout::counter_increment);
3667           return;
3668         }
3669       }
3670     } else {
3671       __ load_klass(recv, recv);
3672       Label update_done;
3673       type_profile_helper(mdo, md, data, recv, &update_done);
3674       // Receiver did not match any saved receiver and there is no empty row for it.
3675       // Increment total counter to indicate polymorphic case.
3676       __ addptr(counter_addr, DataLayout::counter_increment);
3677 
3678       __ bind(update_done);
3679     }
3680   } else {
3681     // Static call
3682     __ addptr(counter_addr, DataLayout::counter_increment);
3683   }
3684 }
3685 
3686 void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
3687   Register obj = op->obj()->as_register();
3688   Register tmp = op->tmp()->as_pointer_register();
3689   Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
3690   ciKlass* exact_klass = op->exact_klass();
3691   intptr_t current_klass = op->current_klass();
3692   bool not_null = op->not_null();
3693   bool no_conflict = op->no_conflict();
3694 
3695   Label update, next, none;
3696 
3697   bool do_null = !not_null;
3698   bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
3699   bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
3700 
3701   assert(do_null || do_update, "why are we here?");
3702   assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
3703 
3704   __ verify_oop(obj);
3705 
3706   if (tmp != obj) {
3707     __ mov(tmp, obj);
3708   }
3709   if (do_null) {
3710     __ testptr(tmp, tmp);
3711     __ jccb(Assembler::notZero, update);
3712     if (!TypeEntries::was_null_seen(current_klass)) {
3713       __ orptr(mdo_addr, TypeEntries::null_seen);
3714     }
3715     if (do_update) {
3716 #ifndef ASSERT
3717       __ jmpb(next);
3718     }
3719 #else
3720       __ jmp(next);
3721     }
3722   } else {
3723     __ testptr(tmp, tmp);
3724     __ jccb(Assembler::notZero, update);
3725     __ stop("unexpect null obj");
3726 #endif
3727   }
3728 
3729   __ bind(update);
3730 
3731   if (do_update) {
3732 #ifdef ASSERT
3733     if (exact_klass != NULL) {
3734       Label ok;
3735       __ load_klass(tmp, tmp);
3736       __ push(tmp);
3737       __ mov_metadata(tmp, exact_klass->constant_encoding());
3738       __ cmpptr(tmp, Address(rsp, 0));
3739       __ jccb(Assembler::equal, ok);
3740       __ stop("exact klass and actual klass differ");
3741       __ bind(ok);
3742       __ pop(tmp);
3743     }
3744 #endif
3745     if (!no_conflict) {
3746       if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
3747         if (exact_klass != NULL) {
3748           __ mov_metadata(tmp, exact_klass->constant_encoding());
3749         } else {
3750           __ load_klass(tmp, tmp);
3751         }
3752 
3753         __ xorptr(tmp, mdo_addr);
3754         __ testptr(tmp, TypeEntries::type_klass_mask);
3755         // klass seen before, nothing to do. The unknown bit may have been
3756         // set already but no need to check.
3757         __ jccb(Assembler::zero, next);
3758 
3759         __ testptr(tmp, TypeEntries::type_unknown);
3760         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3761 
3762         if (TypeEntries::is_type_none(current_klass)) {
3763           __ cmpptr(mdo_addr, 0);
3764           __ jccb(Assembler::equal, none);
3765           __ cmpptr(mdo_addr, TypeEntries::null_seen);
3766           __ jccb(Assembler::equal, none);
3767           // There is a chance that the checks above (re-reading profiling
3768           // data from memory) fail if another thread has just set the
3769           // profiling to this obj's klass
3770           __ xorptr(tmp, mdo_addr);
3771           __ testptr(tmp, TypeEntries::type_klass_mask);
3772           __ jccb(Assembler::zero, next);
3773         }
3774       } else {
3775         assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
3776                ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
3777 
3778         __ movptr(tmp, mdo_addr);
3779         __ testptr(tmp, TypeEntries::type_unknown);
3780         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3781       }
3782 
3783       // different than before. Cannot keep accurate profile.
3784       __ orptr(mdo_addr, TypeEntries::type_unknown);
3785 
3786       if (TypeEntries::is_type_none(current_klass)) {
3787         __ jmpb(next);
3788 
3789         __ bind(none);
3790         // first time here. Set profile type.
3791         __ movptr(mdo_addr, tmp);
3792       }
3793     } else {
3794       // There's a single possible klass at this profile point
3795       assert(exact_klass != NULL, "should be");
3796       if (TypeEntries::is_type_none(current_klass)) {
3797         __ mov_metadata(tmp, exact_klass->constant_encoding());
3798         __ xorptr(tmp, mdo_addr);
3799         __ testptr(tmp, TypeEntries::type_klass_mask);
3800 #ifdef ASSERT
3801         __ jcc(Assembler::zero, next);
3802 
3803         {
3804           Label ok;
3805           __ push(tmp);
3806           __ cmpptr(mdo_addr, 0);
3807           __ jcc(Assembler::equal, ok);
3808           __ cmpptr(mdo_addr, TypeEntries::null_seen);
3809           __ jcc(Assembler::equal, ok);
3810           // may have been set by another thread
3811           __ mov_metadata(tmp, exact_klass->constant_encoding());
3812           __ xorptr(tmp, mdo_addr);
3813           __ testptr(tmp, TypeEntries::type_mask);
3814           __ jcc(Assembler::zero, ok);
3815 
3816           __ stop("unexpected profiling mismatch");
3817           __ bind(ok);
3818           __ pop(tmp);
3819         }
3820 #else
3821         __ jccb(Assembler::zero, next);
3822 #endif
3823         // first time here. Set profile type.
3824         __ movptr(mdo_addr, tmp);
3825       } else {
3826         assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
3827                ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
3828 
3829         __ movptr(tmp, mdo_addr);
3830         __ testptr(tmp, TypeEntries::type_unknown);
3831         __ jccb(Assembler::notZero, next); // already unknown. Nothing to do anymore.
3832 
3833         __ orptr(mdo_addr, TypeEntries::type_unknown);
3834       }
3835     }
3836 
3837     __ bind(next);
3838   }
3839 }
3840 
3841 void LIR_Assembler::emit_delay(LIR_OpDelay*) {
3842   Unimplemented();
3843 }
3844 
3845 
3846 void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
3847   __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
3848 }
3849 
3850 
3851 void LIR_Assembler::align_backward_branch_target() {
3852   __ align(BytesPerWord);
3853 }
3854 
3855 
3856 void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
3857   if (left->is_single_cpu()) {
3858     __ negl(left->as_register());
3859     move_regs(left->as_register(), dest->as_register());
3860 
3861   } else if (left->is_double_cpu()) {
3862     Register lo = left->as_register_lo();
3863 #ifdef _LP64
3864     Register dst = dest->as_register_lo();
3865     __ movptr(dst, lo);
3866     __ negptr(dst);
3867 #else
3868     Register hi = left->as_register_hi();
3869     __ lneg(hi, lo);
3870     if (dest->as_register_lo() == hi) {
3871       assert(dest->as_register_hi() != lo, "destroying register");
3872       move_regs(hi, dest->as_register_hi());
3873       move_regs(lo, dest->as_register_lo());
3874     } else {
3875       move_regs(lo, dest->as_register_lo());
3876       move_regs(hi, dest->as_register_hi());
3877     }
3878 #endif // _LP64
3879 
3880   } else if (dest->is_single_xmm()) {
3881     if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
3882       __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
3883     }
3884     __ xorps(dest->as_xmm_float_reg(),
3885              ExternalAddress((address)float_signflip_pool));
3886 
3887   } else if (dest->is_double_xmm()) {
3888     if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
3889       __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
3890     }
3891     __ xorpd(dest->as_xmm_double_reg(),
3892              ExternalAddress((address)double_signflip_pool));
3893 
3894   } else if (left->is_single_fpu() || left->is_double_fpu()) {
3895     assert(left->fpu() == 0, "arg must be on TOS");
3896     assert(dest->fpu() == 0, "dest must be TOS");
3897     __ fchs();
3898 
3899   } else {
3900     ShouldNotReachHere();
3901   }
3902 }
3903 
3904 
3905 void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
3906   assert(addr->is_address() && dest->is_register(), "check");
3907   Register reg;
3908   reg = dest->as_pointer_register();
3909   __ lea(reg, as_Address(addr->as_address_ptr()));
3910 }
3911 
3912 
3913 
3914 void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
3915   assert(!tmp->is_valid(), "don't need temporary");
3916   __ call(RuntimeAddress(dest));
3917   if (info != NULL) {
3918     add_call_info_here(info);
3919   }
3920 }
3921 
3922 
3923 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
3924   assert(type == T_LONG, "only for volatile long fields");
3925 
3926   if (info != NULL) {
3927     add_debug_info_for_null_check_here(info);
3928   }
3929 
3930   if (src->is_double_xmm()) {
3931     if (dest->is_double_cpu()) {
3932 #ifdef _LP64
3933       __ movdq(dest->as_register_lo(), src->as_xmm_double_reg());
3934 #else
3935       __ movdl(dest->as_register_lo(), src->as_xmm_double_reg());
3936       __ psrlq(src->as_xmm_double_reg(), 32);
3937       __ movdl(dest->as_register_hi(), src->as_xmm_double_reg());
3938 #endif // _LP64
3939     } else if (dest->is_double_stack()) {
3940       __ movdbl(frame_map()->address_for_slot(dest->double_stack_ix()), src->as_xmm_double_reg());
3941     } else if (dest->is_address()) {
3942       __ movdbl(as_Address(dest->as_address_ptr()), src->as_xmm_double_reg());
3943     } else {
3944       ShouldNotReachHere();
3945     }
3946 
3947   } else if (dest->is_double_xmm()) {
3948     if (src->is_double_stack()) {
3949       __ movdbl(dest->as_xmm_double_reg(), frame_map()->address_for_slot(src->double_stack_ix()));
3950     } else if (src->is_address()) {
3951       __ movdbl(dest->as_xmm_double_reg(), as_Address(src->as_address_ptr()));
3952     } else {
3953       ShouldNotReachHere();
3954     }
3955 
3956   } else if (src->is_double_fpu()) {
3957     assert(src->fpu_regnrLo() == 0, "must be TOS");
3958     if (dest->is_double_stack()) {
3959       __ fistp_d(frame_map()->address_for_slot(dest->double_stack_ix()));
3960     } else if (dest->is_address()) {
3961       __ fistp_d(as_Address(dest->as_address_ptr()));
3962     } else {
3963       ShouldNotReachHere();
3964     }
3965 
3966   } else if (dest->is_double_fpu()) {
3967     assert(dest->fpu_regnrLo() == 0, "must be TOS");
3968     if (src->is_double_stack()) {
3969       __ fild_d(frame_map()->address_for_slot(src->double_stack_ix()));
3970     } else if (src->is_address()) {
3971       __ fild_d(as_Address(src->as_address_ptr()));
3972     } else {
3973       ShouldNotReachHere();
3974     }
3975   } else {
3976     ShouldNotReachHere();
3977   }
3978 }
3979 
3980 #ifdef ASSERT
3981 // emit run-time assertion
3982 void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
3983   assert(op->code() == lir_assert, "must be");
3984 
3985   if (op->in_opr1()->is_valid()) {
3986     assert(op->in_opr2()->is_valid(), "both operands must be valid");
3987     comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
3988   } else {
3989     assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
3990     assert(op->condition() == lir_cond_always, "no other conditions allowed");
3991   }
3992 
3993   Label ok;
3994   if (op->condition() != lir_cond_always) {
3995     Assembler::Condition acond = Assembler::zero;
3996     switch (op->condition()) {
3997       case lir_cond_equal:        acond = Assembler::equal;       break;
3998       case lir_cond_notEqual:     acond = Assembler::notEqual;    break;
3999       case lir_cond_less:         acond = Assembler::less;        break;
4000       case lir_cond_lessEqual:    acond = Assembler::lessEqual;   break;
4001       case lir_cond_greaterEqual: acond = Assembler::greaterEqual;break;
4002       case lir_cond_greater:      acond = Assembler::greater;     break;
4003       case lir_cond_belowEqual:   acond = Assembler::belowEqual;  break;
4004       case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;  break;
4005       default:                    ShouldNotReachHere();
4006     }
4007     __ jcc(acond, ok);
4008   }
4009   if (op->halt()) {
4010     const char* str = __ code_string(op->msg());
4011     __ stop(str);
4012   } else {
4013     breakpoint();
4014   }
4015   __ bind(ok);
4016 }
4017 #endif
4018 
4019 void LIR_Assembler::membar() {
4020   // QQQ sparc TSO uses this,
4021   __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad));
4022 }
4023 
4024 void LIR_Assembler::membar_acquire() {
4025   // No x86 machines currently require load fences
4026 }
4027 
4028 void LIR_Assembler::membar_release() {
4029   // No x86 machines currently require store fences
4030 }
4031 
4032 void LIR_Assembler::membar_loadload() {
4033   // no-op
4034   //__ membar(Assembler::Membar_mask_bits(Assembler::loadload));
4035 }
4036 
4037 void LIR_Assembler::membar_storestore() {
4038   // no-op
4039   //__ membar(Assembler::Membar_mask_bits(Assembler::storestore));
4040 }
4041 
4042 void LIR_Assembler::membar_loadstore() {
4043   // no-op
4044   //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore));
4045 }
4046 
4047 void LIR_Assembler::membar_storeload() {
4048   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
4049 }
4050 
4051 void LIR_Assembler::get_thread(LIR_Opr result_reg) {
4052   assert(result_reg->is_register(), "check");
4053 #ifdef _LP64
4054   // __ get_thread(result_reg->as_register_lo());
4055   __ mov(result_reg->as_register(), r15_thread);
4056 #else
4057   __ get_thread(result_reg->as_register());
4058 #endif // _LP64
4059 }
4060 
4061 
4062 void LIR_Assembler::peephole(LIR_List*) {
4063   // do nothing for now
4064 }
4065 
4066 void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
4067   assert(data == dest, "xchg/xadd uses only 2 operands");
4068 
4069   if (data->type() == T_INT) {
4070     if (code == lir_xadd) {
4071       if (os::is_MP()) {
4072         __ lock();
4073       }
4074       __ xaddl(as_Address(src->as_address_ptr()), data->as_register());
4075     } else {
4076       __ xchgl(data->as_register(), as_Address(src->as_address_ptr()));
4077     }
4078   } else if (data->is_oop()) {
4079     assert (code == lir_xchg, "xadd for oops");
4080     Register obj = data->as_register();
4081 #ifdef _LP64
4082     if (UseCompressedOops) {
4083       __ encode_heap_oop(obj);
4084       __ xchgl(obj, as_Address(src->as_address_ptr()));
4085       __ decode_heap_oop(obj);
4086     } else {
4087       __ xchgptr(obj, as_Address(src->as_address_ptr()));
4088     }
4089 #else
4090     __ xchgl(obj, as_Address(src->as_address_ptr()));
4091 #endif
4092   } else if (data->type() == T_LONG) {
4093 #ifdef _LP64
4094     assert(data->as_register_lo() == data->as_register_hi(), "should be a single register");
4095     if (code == lir_xadd) {
4096       if (os::is_MP()) {
4097         __ lock();
4098       }
4099       __ xaddq(as_Address(src->as_address_ptr()), data->as_register_lo());
4100     } else {
4101       __ xchgq(data->as_register_lo(), as_Address(src->as_address_ptr()));
4102     }
4103 #else
4104     ShouldNotReachHere();
4105 #endif
4106   } else {
4107     ShouldNotReachHere();
4108   }
4109 }
4110 
4111 #undef __