1 /*
   2  * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "c1/c1_Compilation.hpp"
  28 #include "c1/c1_LIRAssembler.hpp"
  29 #include "c1/c1_MacroAssembler.hpp"
  30 #include "c1/c1_Runtime1.hpp"
  31 #include "c1/c1_ValueStack.hpp"
  32 #include "ci/ciArrayKlass.hpp"
  33 #include "ci/ciInstance.hpp"
  34 #include "gc_interface/collectedHeap.hpp"
  35 #include "memory/barrierSet.hpp"
  36 #include "memory/cardTableModRefBS.hpp"
  37 #include "nativeInst_x86.hpp"
  38 #include "oops/objArrayKlass.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 
  41 
  42 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  43 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  44 // fast versions of NegF/NegD and AbsF/AbsD.
  45 
  46 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  47 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  48   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  49   // of 128-bits operands for SSE instructions.
  50   jlong *operand = (jlong*)(((intptr_t)adr) & ((intptr_t)(~0xF)));
  51   // Store the value to a 128-bits operand.
  52   operand[0] = lo;
  53   operand[1] = hi;
  54   return operand;
  55 }
  56 
  57 // Buffer for 128-bits masks used by SSE instructions.
  58 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  59 
  60 // Static initialization during VM startup.
  61 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  62 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  63 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  64 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  65 
  66 
  67 
  68 NEEDS_CLEANUP // remove this definitions ?
  69 const Register IC_Klass    = rax;   // where the IC klass is cached
  70 const Register SYNC_header = rax;   // synchronization header
  71 const Register SHIFT_count = rcx;   // where count for shift operations must be
  72 
  73 #define __ _masm->
  74 
  75 
  76 static void select_different_registers(Register preserve,
  77                                        Register extra,
  78                                        Register &tmp1,
  79                                        Register &tmp2) {
  80   if (tmp1 == preserve) {
  81     assert_different_registers(tmp1, tmp2, extra);
  82     tmp1 = extra;
  83   } else if (tmp2 == preserve) {
  84     assert_different_registers(tmp1, tmp2, extra);
  85     tmp2 = extra;
  86   }
  87   assert_different_registers(preserve, tmp1, tmp2);
  88 }
  89 
  90 
  91 
  92 static void select_different_registers(Register preserve,
  93                                        Register extra,
  94                                        Register &tmp1,
  95                                        Register &tmp2,
  96                                        Register &tmp3) {
  97   if (tmp1 == preserve) {
  98     assert_different_registers(tmp1, tmp2, tmp3, extra);
  99     tmp1 = extra;
 100   } else if (tmp2 == preserve) {
 101     assert_different_registers(tmp1, tmp2, tmp3, extra);
 102     tmp2 = extra;
 103   } else if (tmp3 == preserve) {
 104     assert_different_registers(tmp1, tmp2, tmp3, extra);
 105     tmp3 = extra;
 106   }
 107   assert_different_registers(preserve, tmp1, tmp2, tmp3);
 108 }
 109 
 110 
 111 
 112 bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
 113   if (opr->is_constant()) {
 114     LIR_Const* constant = opr->as_constant_ptr();
 115     switch (constant->type()) {
 116       case T_INT: {
 117         return true;
 118       }
 119 
 120       default:
 121         return false;
 122     }
 123   }
 124   return false;
 125 }
 126 
 127 
 128 LIR_Opr LIR_Assembler::receiverOpr() {
 129   return FrameMap::receiver_opr;
 130 }
 131 
 132 LIR_Opr LIR_Assembler::incomingReceiverOpr() {
 133   return receiverOpr();
 134 }
 135 
 136 LIR_Opr LIR_Assembler::osrBufferPointer() {
 137   return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 138 }
 139 
 140 //--------------fpu register translations-----------------------
 141 
 142 
 143 address LIR_Assembler::float_constant(float f) {
 144   address const_addr = __ float_constant(f);
 145   if (const_addr == NULL) {
 146     bailout("const section overflow");
 147     return __ code()->consts()->start();
 148   } else {
 149     return const_addr;
 150   }
 151 }
 152 
 153 
 154 address LIR_Assembler::double_constant(double d) {
 155   address const_addr = __ double_constant(d);
 156   if (const_addr == NULL) {
 157     bailout("const section overflow");
 158     return __ code()->consts()->start();
 159   } else {
 160     return const_addr;
 161   }
 162 }
 163 
 164 
 165 void LIR_Assembler::set_24bit_FPU() {
 166   __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 167 }
 168 
 169 void LIR_Assembler::reset_FPU() {
 170   __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 171 }
 172 
 173 void LIR_Assembler::fpop() {
 174   __ fpop();
 175 }
 176 
 177 void LIR_Assembler::fxch(int i) {
 178   __ fxch(i);
 179 }
 180 
 181 void LIR_Assembler::fld(int i) {
 182   __ fld_s(i);
 183 }
 184 
 185 void LIR_Assembler::ffree(int i) {
 186   __ ffree(i);
 187 }
 188 
 189 void LIR_Assembler::breakpoint() {
 190   __ int3();
 191 }
 192 
 193 void LIR_Assembler::push(LIR_Opr opr) {
 194   if (opr->is_single_cpu()) {
 195     __ push_reg(opr->as_register());
 196   } else if (opr->is_double_cpu()) {
 197     NOT_LP64(__ push_reg(opr->as_register_hi()));
 198     __ push_reg(opr->as_register_lo());
 199   } else if (opr->is_stack()) {
 200     __ push_addr(frame_map()->address_for_slot(opr->single_stack_ix()));
 201   } else if (opr->is_constant()) {
 202     LIR_Const* const_opr = opr->as_constant_ptr();
 203     if (const_opr->type() == T_OBJECT) {
 204       __ push_oop(const_opr->as_jobject());
 205     } else if (const_opr->type() == T_INT) {
 206       __ push_jint(const_opr->as_jint());
 207     } else {
 208       ShouldNotReachHere();
 209     }
 210 
 211   } else {
 212     ShouldNotReachHere();
 213   }
 214 }
 215 
 216 void LIR_Assembler::pop(LIR_Opr opr) {
 217   if (opr->is_single_cpu()) {
 218     __ pop_reg(opr->as_register());
 219   } else {
 220     ShouldNotReachHere();
 221   }
 222 }
 223 
 224 bool LIR_Assembler::is_literal_address(LIR_Address* addr) {
 225   return addr->base()->is_illegal() && addr->index()->is_illegal();
 226 }
 227 
 228 //-------------------------------------------
 229 
 230 Address LIR_Assembler::as_Address(LIR_Address* addr) {
 231   return as_Address(addr, rscratch1);
 232 }
 233 
 234 Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
 235   if (addr->base()->is_illegal()) {
 236     assert(addr->index()->is_illegal(), "must be illegal too");
 237     AddressLiteral laddr((address)addr->disp(), relocInfo::none);
 238     if (! __ reachable(laddr)) {
 239       __ movptr(tmp, laddr.addr());
 240       Address res(tmp, 0);
 241       return res;
 242     } else {
 243       return __ as_Address(laddr);
 244     }
 245   }
 246 
 247   Register base = addr->base()->as_pointer_register();
 248 
 249   if (addr->index()->is_illegal()) {
 250     return Address( base, addr->disp());
 251   } else if (addr->index()->is_cpu_register()) {
 252     Register index = addr->index()->as_pointer_register();
 253     return Address(base, index, (Address::ScaleFactor) addr->scale(), addr->disp());
 254   } else if (addr->index()->is_constant()) {
 255     intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp();
 256     assert(Assembler::is_simm32(addr_offset), "must be");
 257 
 258     return Address(base, addr_offset);
 259   } else {
 260     Unimplemented();
 261     return Address();
 262   }
 263 }
 264 
 265 
 266 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
 267   Address base = as_Address(addr);
 268   return Address(base._base, base._index, base._scale, base._disp + BytesPerWord);
 269 }
 270 
 271 
 272 Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
 273   return as_Address(addr);
 274 }
 275 
 276 
 277 void LIR_Assembler::osr_entry() {
 278   offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
 279   BlockBegin* osr_entry = compilation()->hir()->osr_entry();
 280   ValueStack* entry_state = osr_entry->state();
 281   int number_of_locks = entry_state->locks_size();
 282 
 283   // we jump here if osr happens with the interpreter
 284   // state set up to continue at the beginning of the
 285   // loop that triggered osr - in particular, we have
 286   // the following registers setup:
 287   //
 288   // rcx: osr buffer
 289   //
 290 
 291   // build frame
 292   ciMethod* m = compilation()->method();
 293   __ build_frame(initial_frame_size_in_bytes());
 294 
 295   // OSR buffer is
 296   //
 297   // locals[nlocals-1..0]
 298   // monitors[0..number_of_locks]
 299   //
 300   // locals is a direct copy of the interpreter frame so in the osr buffer
 301   // so first slot in the local array is the last local from the interpreter
 302   // and last slot is local[0] (receiver) from the interpreter
 303   //
 304   // Similarly with locks. The first lock slot in the osr buffer is the nth lock
 305   // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
 306   // in the interpreter frame (the method lock if a sync method)
 307 
 308   // Initialize monitors in the compiled activation.
 309   //   rcx: pointer to osr buffer
 310   //
 311   // All other registers are dead at this point and the locals will be
 312   // copied into place by code emitted in the IR.
 313 
 314   Register OSR_buf = osrBufferPointer()->as_pointer_register();
 315   { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
 316     int monitor_offset = BytesPerWord * method()->max_locals() +
 317       (2 * BytesPerWord) * (number_of_locks - 1);
 318     // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
 319     // the OSR buffer using 2 word entries: first the lock and then
 320     // the oop.
 321     for (int i = 0; i < number_of_locks; i++) {
 322       int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
 323 #ifdef ASSERT
 324       // verify the interpreter's monitor has a non-null object
 325       {
 326         Label L;
 327         __ cmpptr(Address(OSR_buf, slot_offset + 1*BytesPerWord), (int32_t)NULL_WORD);
 328         __ jcc(Assembler::notZero, L);
 329         __ stop("locked object is NULL");
 330         __ bind(L);
 331       }
 332 #endif
 333       __ movptr(rbx, Address(OSR_buf, slot_offset + 0));
 334       __ movptr(frame_map()->address_for_monitor_lock(i), rbx);
 335       __ movptr(rbx, Address(OSR_buf, slot_offset + 1*BytesPerWord));
 336       __ movptr(frame_map()->address_for_monitor_object(i), rbx);
 337     }
 338   }
 339 }
 340 
 341 
 342 // inline cache check; done before the frame is built.
 343 int LIR_Assembler::check_icache() {
 344   Register receiver = FrameMap::receiver_opr->as_register();
 345   Register ic_klass = IC_Klass;
 346   const int ic_cmp_size = LP64_ONLY(10) NOT_LP64(9);
 347   const bool do_post_padding = VerifyOops || UseCompressedOops;
 348   if (!do_post_padding) {
 349     // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
 350     while ((__ offset() + ic_cmp_size) % CodeEntryAlignment != 0) {
 351       __ nop();
 352     }
 353   }
 354   int offset = __ offset();
 355   __ inline_cache_check(receiver, IC_Klass);
 356   assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct");
 357   if (do_post_padding) {
 358     // force alignment after the cache check.
 359     // It's been verified to be aligned if !VerifyOops
 360     __ align(CodeEntryAlignment);
 361   }
 362   return offset;
 363 }
 364 
 365 
 366 void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo* info) {
 367   jobject o = NULL;
 368   PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id);
 369   __ movoop(reg, o);
 370   patching_epilog(patch, lir_patch_normal, reg, info);
 371 }
 372 
 373 
 374 void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register new_hdr, int monitor_no, Register exception) {
 375   if (exception->is_valid()) {
 376     // preserve exception
 377     // note: the monitor_exit runtime call is a leaf routine
 378     //       and cannot block => no GC can happen
 379     // The slow case (MonitorAccessStub) uses the first two stack slots
 380     // ([esp+0] and [esp+4]), therefore we store the exception at [esp+8]
 381     __ movptr (Address(rsp, 2*wordSize), exception);
 382   }
 383 
 384   Register obj_reg  = obj_opr->as_register();
 385   Register lock_reg = lock_opr->as_register();
 386 
 387   // setup registers (lock_reg must be rax, for lock_object)
 388   assert(obj_reg != SYNC_header && lock_reg != SYNC_header, "rax, must be available here");
 389   Register hdr = lock_reg;
 390   assert(new_hdr == SYNC_header, "wrong register");
 391   lock_reg = new_hdr;
 392   // compute pointer to BasicLock
 393   Address lock_addr = frame_map()->address_for_monitor_lock(monitor_no);
 394   __ lea(lock_reg, lock_addr);
 395   // unlock object
 396   MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, true, monitor_no);
 397   // _slow_case_stubs->append(slow_case);
 398   // temporary fix: must be created after exceptionhandler, therefore as call stub
 399   _slow_case_stubs->append(slow_case);
 400   if (UseFastLocking) {
 401     // try inlined fast unlocking first, revert to slow locking if it fails
 402     // note: lock_reg points to the displaced header since the displaced header offset is 0!
 403     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
 404     __ unlock_object(hdr, obj_reg, lock_reg, *slow_case->entry());
 405   } else {
 406     // always do slow unlocking
 407     // note: the slow unlocking code could be inlined here, however if we use
 408     //       slow unlocking, speed doesn't matter anyway and this solution is
 409     //       simpler and requires less duplicated code - additionally, the
 410     //       slow unlocking code is the same in either case which simplifies
 411     //       debugging
 412     __ jmp(*slow_case->entry());
 413   }
 414   // done
 415   __ bind(*slow_case->continuation());
 416 
 417   if (exception->is_valid()) {
 418     // restore exception
 419     __ movptr (exception, Address(rsp, 2 * wordSize));
 420   }
 421 }
 422 
 423 // This specifies the rsp decrement needed to build the frame
 424 int LIR_Assembler::initial_frame_size_in_bytes() {
 425   // if rounding, must let FrameMap know!
 426 
 427   // The frame_map records size in slots (32bit word)
 428 
 429   // subtract two words to account for return address and link
 430   return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
 431 }
 432 
 433 
 434 int LIR_Assembler::emit_exception_handler() {
 435   // if the last instruction is a call (typically to do a throw which
 436   // is coming at the end after block reordering) the return address
 437   // must still point into the code area in order to avoid assertion
 438   // failures when searching for the corresponding bci => add a nop
 439   // (was bug 5/14/1999 - gri)
 440   __ nop();
 441 
 442   // generate code for exception handler
 443   address handler_base = __ start_a_stub(exception_handler_size);
 444   if (handler_base == NULL) {
 445     // not enough space left for the handler
 446     bailout("exception handler overflow");
 447     return -1;
 448   }
 449 
 450   int offset = code_offset();
 451 
 452   // the exception oop and pc are in rax, and rdx
 453   // no other registers need to be preserved, so invalidate them
 454   __ invalidate_registers(false, true, true, false, true, true);
 455 
 456   // check that there is really an exception
 457   __ verify_not_null_oop(rax);
 458 
 459   // search an exception handler (rax: exception oop, rdx: throwing pc)
 460   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
 461   __ should_not_reach_here();
 462   assert(code_offset() - offset <= exception_handler_size, "overflow");
 463   __ end_a_stub();
 464 
 465   return offset;
 466 }
 467 
 468 
 469 // Emit the code to remove the frame from the stack in the exception
 470 // unwind path.
 471 int LIR_Assembler::emit_unwind_handler() {
 472 #ifndef PRODUCT
 473   if (CommentedAssembly) {
 474     _masm->block_comment("Unwind handler");
 475   }
 476 #endif
 477 
 478   int offset = code_offset();
 479 
 480   // Fetch the exception from TLS and clear out exception related thread state
 481   __ get_thread(rsi);
 482   __ movptr(rax, Address(rsi, JavaThread::exception_oop_offset()));
 483   __ movptr(Address(rsi, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
 484   __ movptr(Address(rsi, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
 485 
 486   __ bind(_unwind_handler_entry);
 487   __ verify_not_null_oop(rax);
 488   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 489     __ mov(rsi, rax);  // Preserve the exception
 490   }
 491 
 492   // Preform needed unlocking
 493   MonitorExitStub* stub = NULL;
 494   if (method()->is_synchronized()) {
 495     monitor_address(0, FrameMap::rax_opr);
 496     stub = new MonitorExitStub(FrameMap::rax_opr, true, 0);
 497     __ unlock_object(rdi, rbx, rax, *stub->entry());
 498     __ bind(*stub->continuation());
 499   }
 500 
 501   if (compilation()->env()->dtrace_method_probes()) {
 502     __ get_thread(rax);
 503     __ movptr(Address(rsp, 0), rax);
 504     __ movoop(Address(rsp, sizeof(void*)), method()->constant_encoding());
 505     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
 506   }
 507 
 508   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 509     __ mov(rax, rsi);  // Restore the exception
 510   }
 511 
 512   // remove the activation and dispatch to the unwind handler
 513   __ remove_frame(initial_frame_size_in_bytes());
 514   __ jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
 515 
 516   // Emit the slow path assembly
 517   if (stub != NULL) {
 518     stub->emit_code(this);
 519   }
 520 
 521   return offset;
 522 }
 523 
 524 
 525 int LIR_Assembler::emit_deopt_handler() {
 526   // if the last instruction is a call (typically to do a throw which
 527   // is coming at the end after block reordering) the return address
 528   // must still point into the code area in order to avoid assertion
 529   // failures when searching for the corresponding bci => add a nop
 530   // (was bug 5/14/1999 - gri)
 531   __ nop();
 532 
 533   // generate code for exception handler
 534   address handler_base = __ start_a_stub(deopt_handler_size);
 535   if (handler_base == NULL) {
 536     // not enough space left for the handler
 537     bailout("deopt handler overflow");
 538     return -1;
 539   }
 540 
 541   int offset = code_offset();
 542   InternalAddress here(__ pc());
 543 
 544   __ pushptr(here.addr());
 545   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 546 
 547   assert(code_offset() - offset <= deopt_handler_size, "overflow");
 548   __ end_a_stub();
 549 
 550   return offset;
 551 }
 552 
 553 
 554 // This is the fast version of java.lang.String.compare; it has not
 555 // OSR-entry and therefore, we generate a slow version for OSR's
 556 void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
 557   __ movptr (rbx, rcx); // receiver is in rcx
 558   __ movptr (rax, arg1->as_register());
 559 
 560   // Get addresses of first characters from both Strings
 561   __ load_heap_oop(rsi, Address(rax, java_lang_String::value_offset_in_bytes()));
 562   __ movptr       (rcx, Address(rax, java_lang_String::offset_offset_in_bytes()));
 563   __ lea          (rsi, Address(rsi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 564 
 565 
 566   // rbx, may be NULL
 567   add_debug_info_for_null_check_here(info);
 568   __ load_heap_oop(rdi, Address(rbx, java_lang_String::value_offset_in_bytes()));
 569   __ movptr       (rcx, Address(rbx, java_lang_String::offset_offset_in_bytes()));
 570   __ lea          (rdi, Address(rdi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
 571 
 572   // compute minimum length (in rax) and difference of lengths (on top of stack)
 573   __ movl  (rbx, Address(rbx, java_lang_String::count_offset_in_bytes()));
 574   __ movl  (rax, Address(rax, java_lang_String::count_offset_in_bytes()));
 575   __ mov   (rcx, rbx);
 576   __ subptr(rbx, rax); // subtract lengths
 577   __ push  (rbx);      // result
 578   __ cmov  (Assembler::lessEqual, rax, rcx);
 579 
 580   // is minimum length 0?
 581   Label noLoop, haveResult;
 582   __ testptr (rax, rax);
 583   __ jcc (Assembler::zero, noLoop);
 584 
 585   // compare first characters
 586   __ load_unsigned_short(rcx, Address(rdi, 0));
 587   __ load_unsigned_short(rbx, Address(rsi, 0));
 588   __ subl(rcx, rbx);
 589   __ jcc(Assembler::notZero, haveResult);
 590   // starting loop
 591   __ decrement(rax); // we already tested index: skip one
 592   __ jcc(Assembler::zero, noLoop);
 593 
 594   // set rsi.edi to the end of the arrays (arrays have same length)
 595   // negate the index
 596 
 597   __ lea(rsi, Address(rsi, rax, Address::times_2, type2aelembytes(T_CHAR)));
 598   __ lea(rdi, Address(rdi, rax, Address::times_2, type2aelembytes(T_CHAR)));
 599   __ negptr(rax);
 600 
 601   // compare the strings in a loop
 602 
 603   Label loop;
 604   __ align(wordSize);
 605   __ bind(loop);
 606   __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
 607   __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
 608   __ subl(rcx, rbx);
 609   __ jcc(Assembler::notZero, haveResult);
 610   __ increment(rax);
 611   __ jcc(Assembler::notZero, loop);
 612 
 613   // strings are equal up to min length
 614 
 615   __ bind(noLoop);
 616   __ pop(rax);
 617   return_op(LIR_OprFact::illegalOpr);
 618 
 619   __ bind(haveResult);
 620   // leave instruction is going to discard the TOS value
 621   __ mov (rax, rcx); // result of call is in rax,
 622 }
 623 
 624 
 625 void LIR_Assembler::return_op(LIR_Opr result) {
 626   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
 627   if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
 628     assert(result->fpu() == 0, "result must already be on TOS");
 629   }
 630 
 631   // Pop the stack before the safepoint code
 632   __ remove_frame(initial_frame_size_in_bytes());
 633 
 634   bool result_is_oop = result->is_valid() ? result->is_oop() : false;
 635 
 636   // Note: we do not need to round double result; float result has the right precision
 637   // the poll sets the condition code, but no data registers
 638   AddressLiteral polling_page(os::get_polling_page() + (SafepointPollOffset % os::vm_page_size()),
 639                               relocInfo::poll_return_type);
 640 
 641   if (Assembler::is_polling_page_far()) {
 642     __ lea(rscratch1, polling_page);
 643     __ relocate(relocInfo::poll_return_type);
 644     __ testl(rax, Address(rscratch1, 0));
 645   } else {
 646     __ testl(rax, polling_page);
 647   }
 648   __ ret(0);
 649 }
 650 
 651 
 652 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
 653   AddressLiteral polling_page(os::get_polling_page() + (SafepointPollOffset % os::vm_page_size()),
 654                               relocInfo::poll_type);
 655   guarantee(info != NULL, "Shouldn't be NULL");
 656   int offset = __ offset();
 657   if (Assembler::is_polling_page_far()) {
 658     __ lea(rscratch1, polling_page);
 659     offset = __ offset();
 660     add_debug_info_for_branch(info);
 661     __ testl(rax, Address(rscratch1, 0));
 662   } else {
 663     add_debug_info_for_branch(info);
 664     __ testl(rax, polling_page);
 665   }
 666   return offset;
 667 }
 668 
 669 
 670 void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
 671   if (from_reg != to_reg) __ mov(to_reg, from_reg);
 672 }
 673 
 674 void LIR_Assembler::swap_reg(Register a, Register b) {
 675   __ xchgptr(a, b);
 676 }
 677 
 678 
 679 void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
 680   assert(src->is_constant(), "should not call otherwise");
 681   assert(dest->is_register(), "should not call otherwise");
 682   LIR_Const* c = src->as_constant_ptr();
 683 
 684   switch (c->type()) {
 685     case T_INT: {
 686       assert(patch_code == lir_patch_none, "no patching handled here");
 687       __ movl(dest->as_register(), c->as_jint());
 688       break;
 689     }
 690 
 691     case T_ADDRESS: {
 692       assert(patch_code == lir_patch_none, "no patching handled here");
 693       __ movptr(dest->as_register(), c->as_jint());
 694       break;
 695     }
 696 
 697     case T_LONG: {
 698       assert(patch_code == lir_patch_none, "no patching handled here");
 699 #ifdef _LP64
 700       __ movptr(dest->as_register_lo(), (intptr_t)c->as_jlong());
 701 #else
 702       __ movptr(dest->as_register_lo(), c->as_jint_lo());
 703       __ movptr(dest->as_register_hi(), c->as_jint_hi());
 704 #endif // _LP64
 705       break;
 706     }
 707 
 708     case T_OBJECT: {
 709       if (patch_code != lir_patch_none) {
 710         jobject2reg_with_patching(dest->as_register(), info);
 711       } else {
 712         __ movoop(dest->as_register(), c->as_jobject());
 713       }
 714       break;
 715     }
 716 
 717     case T_FLOAT: {
 718       if (dest->is_single_xmm()) {
 719         if (c->is_zero_float()) {
 720           __ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg());
 721         } else {
 722           __ movflt(dest->as_xmm_float_reg(),
 723                    InternalAddress(float_constant(c->as_jfloat())));
 724         }
 725       } else {
 726         assert(dest->is_single_fpu(), "must be");
 727         assert(dest->fpu_regnr() == 0, "dest must be TOS");
 728         if (c->is_zero_float()) {
 729           __ fldz();
 730         } else if (c->is_one_float()) {
 731           __ fld1();
 732         } else {
 733           __ fld_s (InternalAddress(float_constant(c->as_jfloat())));
 734         }
 735       }
 736       break;
 737     }
 738 
 739     case T_DOUBLE: {
 740       if (dest->is_double_xmm()) {
 741         if (c->is_zero_double()) {
 742           __ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg());
 743         } else {
 744           __ movdbl(dest->as_xmm_double_reg(),
 745                     InternalAddress(double_constant(c->as_jdouble())));
 746         }
 747       } else {
 748         assert(dest->is_double_fpu(), "must be");
 749         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
 750         if (c->is_zero_double()) {
 751           __ fldz();
 752         } else if (c->is_one_double()) {
 753           __ fld1();
 754         } else {
 755           __ fld_d (InternalAddress(double_constant(c->as_jdouble())));
 756         }
 757       }
 758       break;
 759     }
 760 
 761     default:
 762       ShouldNotReachHere();
 763   }
 764 }
 765 
 766 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
 767   assert(src->is_constant(), "should not call otherwise");
 768   assert(dest->is_stack(), "should not call otherwise");
 769   LIR_Const* c = src->as_constant_ptr();
 770 
 771   switch (c->type()) {
 772     case T_INT:  // fall through
 773     case T_FLOAT:
 774       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 775       break;
 776 
 777     case T_ADDRESS:
 778       __ movptr(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jint_bits());
 779       break;
 780 
 781     case T_OBJECT:
 782       __ movoop(frame_map()->address_for_slot(dest->single_stack_ix()), c->as_jobject());
 783       break;
 784 
 785     case T_LONG:  // fall through
 786     case T_DOUBLE:
 787 #ifdef _LP64
 788       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 789                                             lo_word_offset_in_bytes), (intptr_t)c->as_jlong_bits());
 790 #else
 791       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 792                                               lo_word_offset_in_bytes), c->as_jint_lo_bits());
 793       __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(),
 794                                               hi_word_offset_in_bytes), c->as_jint_hi_bits());
 795 #endif // _LP64
 796       break;
 797 
 798     default:
 799       ShouldNotReachHere();
 800   }
 801 }
 802 
 803 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
 804   assert(src->is_constant(), "should not call otherwise");
 805   assert(dest->is_address(), "should not call otherwise");
 806   LIR_Const* c = src->as_constant_ptr();
 807   LIR_Address* addr = dest->as_address_ptr();
 808 
 809   int null_check_here = code_offset();
 810   switch (type) {
 811     case T_INT:    // fall through
 812     case T_FLOAT:
 813       __ movl(as_Address(addr), c->as_jint_bits());
 814       break;
 815 
 816     case T_ADDRESS:
 817       __ movptr(as_Address(addr), c->as_jint_bits());
 818       break;
 819 
 820     case T_OBJECT:  // fall through
 821     case T_ARRAY:
 822       if (c->as_jobject() == NULL) {
 823         if (UseCompressedOops && !wide) {
 824           __ movl(as_Address(addr), (int32_t)NULL_WORD);
 825         } else {
 826           __ movptr(as_Address(addr), NULL_WORD);
 827         }
 828       } else {
 829         if (is_literal_address(addr)) {
 830           ShouldNotReachHere();
 831           __ movoop(as_Address(addr, noreg), c->as_jobject());
 832         } else {
 833 #ifdef _LP64
 834           __ movoop(rscratch1, c->as_jobject());
 835           if (UseCompressedOops && !wide) {
 836             __ encode_heap_oop(rscratch1);
 837             null_check_here = code_offset();
 838             __ movl(as_Address_lo(addr), rscratch1);
 839           } else {
 840             null_check_here = code_offset();
 841             __ movptr(as_Address_lo(addr), rscratch1);
 842           }
 843 #else
 844           __ movoop(as_Address(addr), c->as_jobject());
 845 #endif
 846         }
 847       }
 848       break;
 849 
 850     case T_LONG:    // fall through
 851     case T_DOUBLE:
 852 #ifdef _LP64
 853       if (is_literal_address(addr)) {
 854         ShouldNotReachHere();
 855         __ movptr(as_Address(addr, r15_thread), (intptr_t)c->as_jlong_bits());
 856       } else {
 857         __ movptr(r10, (intptr_t)c->as_jlong_bits());
 858         null_check_here = code_offset();
 859         __ movptr(as_Address_lo(addr), r10);
 860       }
 861 #else
 862       // Always reachable in 32bit so this doesn't produce useless move literal
 863       __ movptr(as_Address_hi(addr), c->as_jint_hi_bits());
 864       __ movptr(as_Address_lo(addr), c->as_jint_lo_bits());
 865 #endif // _LP64
 866       break;
 867 
 868     case T_BOOLEAN: // fall through
 869     case T_BYTE:
 870       __ movb(as_Address(addr), c->as_jint() & 0xFF);
 871       break;
 872 
 873     case T_CHAR:    // fall through
 874     case T_SHORT:
 875       __ movw(as_Address(addr), c->as_jint() & 0xFFFF);
 876       break;
 877 
 878     default:
 879       ShouldNotReachHere();
 880   };
 881 
 882   if (info != NULL) {
 883     add_debug_info_for_null_check(null_check_here, info);
 884   }
 885 }
 886 
 887 
 888 void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
 889   assert(src->is_register(), "should not call otherwise");
 890   assert(dest->is_register(), "should not call otherwise");
 891 
 892   // move between cpu-registers
 893   if (dest->is_single_cpu()) {
 894 #ifdef _LP64
 895     if (src->type() == T_LONG) {
 896       // Can do LONG -> OBJECT
 897       move_regs(src->as_register_lo(), dest->as_register());
 898       return;
 899     }
 900 #endif
 901     assert(src->is_single_cpu(), "must match");
 902     if (src->type() == T_OBJECT) {
 903       __ verify_oop(src->as_register());
 904     }
 905     move_regs(src->as_register(), dest->as_register());
 906 
 907   } else if (dest->is_double_cpu()) {
 908 #ifdef _LP64
 909     if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
 910       // Surprising to me but we can see move of a long to t_object
 911       __ verify_oop(src->as_register());
 912       move_regs(src->as_register(), dest->as_register_lo());
 913       return;
 914     }
 915 #endif
 916     assert(src->is_double_cpu(), "must match");
 917     Register f_lo = src->as_register_lo();
 918     Register f_hi = src->as_register_hi();
 919     Register t_lo = dest->as_register_lo();
 920     Register t_hi = dest->as_register_hi();
 921 #ifdef _LP64
 922     assert(f_hi == f_lo, "must be same");
 923     assert(t_hi == t_lo, "must be same");
 924     move_regs(f_lo, t_lo);
 925 #else
 926     assert(f_lo != f_hi && t_lo != t_hi, "invalid register allocation");
 927 
 928 
 929     if (f_lo == t_hi && f_hi == t_lo) {
 930       swap_reg(f_lo, f_hi);
 931     } else if (f_hi == t_lo) {
 932       assert(f_lo != t_hi, "overwriting register");
 933       move_regs(f_hi, t_hi);
 934       move_regs(f_lo, t_lo);
 935     } else {
 936       assert(f_hi != t_lo, "overwriting register");
 937       move_regs(f_lo, t_lo);
 938       move_regs(f_hi, t_hi);
 939     }
 940 #endif // LP64
 941 
 942     // special moves from fpu-register to xmm-register
 943     // necessary for method results
 944   } else if (src->is_single_xmm() && !dest->is_single_xmm()) {
 945     __ movflt(Address(rsp, 0), src->as_xmm_float_reg());
 946     __ fld_s(Address(rsp, 0));
 947   } else if (src->is_double_xmm() && !dest->is_double_xmm()) {
 948     __ movdbl(Address(rsp, 0), src->as_xmm_double_reg());
 949     __ fld_d(Address(rsp, 0));
 950   } else if (dest->is_single_xmm() && !src->is_single_xmm()) {
 951     __ fstp_s(Address(rsp, 0));
 952     __ movflt(dest->as_xmm_float_reg(), Address(rsp, 0));
 953   } else if (dest->is_double_xmm() && !src->is_double_xmm()) {
 954     __ fstp_d(Address(rsp, 0));
 955     __ movdbl(dest->as_xmm_double_reg(), Address(rsp, 0));
 956 
 957     // move between xmm-registers
 958   } else if (dest->is_single_xmm()) {
 959     assert(src->is_single_xmm(), "must match");
 960     __ movflt(dest->as_xmm_float_reg(), src->as_xmm_float_reg());
 961   } else if (dest->is_double_xmm()) {
 962     assert(src->is_double_xmm(), "must match");
 963     __ movdbl(dest->as_xmm_double_reg(), src->as_xmm_double_reg());
 964 
 965     // move between fpu-registers (no instruction necessary because of fpu-stack)
 966   } else if (dest->is_single_fpu() || dest->is_double_fpu()) {
 967     assert(src->is_single_fpu() || src->is_double_fpu(), "must match");
 968     assert(src->fpu() == dest->fpu(), "currently should be nothing to do");
 969   } else {
 970     ShouldNotReachHere();
 971   }
 972 }
 973 
 974 void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
 975   assert(src->is_register(), "should not call otherwise");
 976   assert(dest->is_stack(), "should not call otherwise");
 977 
 978   if (src->is_single_cpu()) {
 979     Address dst = frame_map()->address_for_slot(dest->single_stack_ix());
 980     if (type == T_OBJECT || type == T_ARRAY) {
 981       __ verify_oop(src->as_register());
 982       __ movptr (dst, src->as_register());
 983     } else {
 984       __ movl (dst, src->as_register());
 985     }
 986 
 987   } else if (src->is_double_cpu()) {
 988     Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
 989     Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes);
 990     __ movptr (dstLO, src->as_register_lo());
 991     NOT_LP64(__ movptr (dstHI, src->as_register_hi()));
 992 
 993   } else if (src->is_single_xmm()) {
 994     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
 995     __ movflt(dst_addr, src->as_xmm_float_reg());
 996 
 997   } else if (src->is_double_xmm()) {
 998     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
 999     __ movdbl(dst_addr, src->as_xmm_double_reg());
1000 
1001   } else if (src->is_single_fpu()) {
1002     assert(src->fpu_regnr() == 0, "argument must be on TOS");
1003     Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix());
1004     if (pop_fpu_stack)     __ fstp_s (dst_addr);
1005     else                   __ fst_s  (dst_addr);
1006 
1007   } else if (src->is_double_fpu()) {
1008     assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
1009     Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix());
1010     if (pop_fpu_stack)     __ fstp_d (dst_addr);
1011     else                   __ fst_d  (dst_addr);
1012 
1013   } else {
1014     ShouldNotReachHere();
1015   }
1016 }
1017 
1018 
1019 void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
1020   LIR_Address* to_addr = dest->as_address_ptr();
1021   PatchingStub* patch = NULL;
1022   Register compressed_src = rscratch1;
1023 
1024   if (type == T_ARRAY || type == T_OBJECT) {
1025     __ verify_oop(src->as_register());
1026 #ifdef _LP64
1027     if (UseCompressedOops && !wide) {
1028       __ movptr(compressed_src, src->as_register());
1029       __ encode_heap_oop(compressed_src);
1030     }
1031 #endif
1032   }
1033 
1034   if (patch_code != lir_patch_none) {
1035     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1036     Address toa = as_Address(to_addr);
1037     assert(toa.disp() != 0, "must have");
1038   }
1039 
1040   int null_check_here = code_offset();
1041   switch (type) {
1042     case T_FLOAT: {
1043       if (src->is_single_xmm()) {
1044         __ movflt(as_Address(to_addr), src->as_xmm_float_reg());
1045       } else {
1046         assert(src->is_single_fpu(), "must be");
1047         assert(src->fpu_regnr() == 0, "argument must be on TOS");
1048         if (pop_fpu_stack)      __ fstp_s(as_Address(to_addr));
1049         else                    __ fst_s (as_Address(to_addr));
1050       }
1051       break;
1052     }
1053 
1054     case T_DOUBLE: {
1055       if (src->is_double_xmm()) {
1056         __ movdbl(as_Address(to_addr), src->as_xmm_double_reg());
1057       } else {
1058         assert(src->is_double_fpu(), "must be");
1059         assert(src->fpu_regnrLo() == 0, "argument must be on TOS");
1060         if (pop_fpu_stack)      __ fstp_d(as_Address(to_addr));
1061         else                    __ fst_d (as_Address(to_addr));
1062       }
1063       break;
1064     }
1065 
1066     case T_ARRAY:   // fall through
1067     case T_OBJECT:  // fall through
1068       if (UseCompressedOops && !wide) {
1069         __ movl(as_Address(to_addr), compressed_src);
1070       } else {
1071         __ movptr(as_Address(to_addr), src->as_register());
1072       }
1073       break;
1074     case T_ADDRESS:
1075       __ movptr(as_Address(to_addr), src->as_register());
1076       break;
1077     case T_INT:
1078       __ movl(as_Address(to_addr), src->as_register());
1079       break;
1080 
1081     case T_LONG: {
1082       Register from_lo = src->as_register_lo();
1083       Register from_hi = src->as_register_hi();
1084 #ifdef _LP64
1085       __ movptr(as_Address_lo(to_addr), from_lo);
1086 #else
1087       Register base = to_addr->base()->as_register();
1088       Register index = noreg;
1089       if (to_addr->index()->is_register()) {
1090         index = to_addr->index()->as_register();
1091       }
1092       if (base == from_lo || index == from_lo) {
1093         assert(base != from_hi, "can't be");
1094         assert(index == noreg || (index != base && index != from_hi), "can't handle this");
1095         __ movl(as_Address_hi(to_addr), from_hi);
1096         if (patch != NULL) {
1097           patching_epilog(patch, lir_patch_high, base, info);
1098           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1099           patch_code = lir_patch_low;
1100         }
1101         __ movl(as_Address_lo(to_addr), from_lo);
1102       } else {
1103         assert(index == noreg || (index != base && index != from_lo), "can't handle this");
1104         __ movl(as_Address_lo(to_addr), from_lo);
1105         if (patch != NULL) {
1106           patching_epilog(patch, lir_patch_low, base, info);
1107           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1108           patch_code = lir_patch_high;
1109         }
1110         __ movl(as_Address_hi(to_addr), from_hi);
1111       }
1112 #endif // _LP64
1113       break;
1114     }
1115 
1116     case T_BYTE:    // fall through
1117     case T_BOOLEAN: {
1118       Register src_reg = src->as_register();
1119       Address dst_addr = as_Address(to_addr);
1120       assert(VM_Version::is_P6() || src_reg->has_byte_register(), "must use byte registers if not P6");
1121       __ movb(dst_addr, src_reg);
1122       break;
1123     }
1124 
1125     case T_CHAR:    // fall through
1126     case T_SHORT:
1127       __ movw(as_Address(to_addr), src->as_register());
1128       break;
1129 
1130     default:
1131       ShouldNotReachHere();
1132   }
1133   if (info != NULL) {
1134     add_debug_info_for_null_check(null_check_here, info);
1135   }
1136 
1137   if (patch_code != lir_patch_none) {
1138     patching_epilog(patch, patch_code, to_addr->base()->as_register(), info);
1139   }
1140 }
1141 
1142 
1143 void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
1144   assert(src->is_stack(), "should not call otherwise");
1145   assert(dest->is_register(), "should not call otherwise");
1146 
1147   if (dest->is_single_cpu()) {
1148     if (type == T_ARRAY || type == T_OBJECT) {
1149       __ movptr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1150       __ verify_oop(dest->as_register());
1151     } else {
1152       __ movl(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
1153     }
1154 
1155   } else if (dest->is_double_cpu()) {
1156     Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
1157     Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes);
1158     __ movptr(dest->as_register_lo(), src_addr_LO);
1159     NOT_LP64(__ movptr(dest->as_register_hi(), src_addr_HI));
1160 
1161   } else if (dest->is_single_xmm()) {
1162     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1163     __ movflt(dest->as_xmm_float_reg(), src_addr);
1164 
1165   } else if (dest->is_double_xmm()) {
1166     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1167     __ movdbl(dest->as_xmm_double_reg(), src_addr);
1168 
1169   } else if (dest->is_single_fpu()) {
1170     assert(dest->fpu_regnr() == 0, "dest must be TOS");
1171     Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
1172     __ fld_s(src_addr);
1173 
1174   } else if (dest->is_double_fpu()) {
1175     assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1176     Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
1177     __ fld_d(src_addr);
1178 
1179   } else {
1180     ShouldNotReachHere();
1181   }
1182 }
1183 
1184 
1185 void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
1186   if (src->is_single_stack()) {
1187     if (type == T_OBJECT || type == T_ARRAY) {
1188       __ pushptr(frame_map()->address_for_slot(src ->single_stack_ix()));
1189       __ popptr (frame_map()->address_for_slot(dest->single_stack_ix()));
1190     } else {
1191 #ifndef _LP64
1192       __ pushl(frame_map()->address_for_slot(src ->single_stack_ix()));
1193       __ popl (frame_map()->address_for_slot(dest->single_stack_ix()));
1194 #else
1195       //no pushl on 64bits
1196       __ movl(rscratch1, frame_map()->address_for_slot(src ->single_stack_ix()));
1197       __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), rscratch1);
1198 #endif
1199     }
1200 
1201   } else if (src->is_double_stack()) {
1202 #ifdef _LP64
1203     __ pushptr(frame_map()->address_for_slot(src ->double_stack_ix()));
1204     __ popptr (frame_map()->address_for_slot(dest->double_stack_ix()));
1205 #else
1206     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 0));
1207     // push and pop the part at src + wordSize, adding wordSize for the previous push
1208     __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 2 * wordSize));
1209     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 2 * wordSize));
1210     __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 0));
1211 #endif // _LP64
1212 
1213   } else {
1214     ShouldNotReachHere();
1215   }
1216 }
1217 
1218 
1219 void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
1220   assert(src->is_address(), "should not call otherwise");
1221   assert(dest->is_register(), "should not call otherwise");
1222 
1223   LIR_Address* addr = src->as_address_ptr();
1224   Address from_addr = as_Address(addr);
1225 
1226   switch (type) {
1227     case T_BOOLEAN: // fall through
1228     case T_BYTE:    // fall through
1229     case T_CHAR:    // fall through
1230     case T_SHORT:
1231       if (!VM_Version::is_P6() && !from_addr.uses(dest->as_register())) {
1232         // on pre P6 processors we may get partial register stalls
1233         // so blow away the value of to_rinfo before loading a
1234         // partial word into it.  Do it here so that it precedes
1235         // the potential patch point below.
1236         __ xorptr(dest->as_register(), dest->as_register());
1237       }
1238       break;
1239   }
1240 
1241   PatchingStub* patch = NULL;
1242   if (patch_code != lir_patch_none) {
1243     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1244     assert(from_addr.disp() != 0, "must have");
1245   }
1246   if (info != NULL) {
1247     add_debug_info_for_null_check_here(info);
1248   }
1249 
1250   switch (type) {
1251     case T_FLOAT: {
1252       if (dest->is_single_xmm()) {
1253         __ movflt(dest->as_xmm_float_reg(), from_addr);
1254       } else {
1255         assert(dest->is_single_fpu(), "must be");
1256         assert(dest->fpu_regnr() == 0, "dest must be TOS");
1257         __ fld_s(from_addr);
1258       }
1259       break;
1260     }
1261 
1262     case T_DOUBLE: {
1263       if (dest->is_double_xmm()) {
1264         __ movdbl(dest->as_xmm_double_reg(), from_addr);
1265       } else {
1266         assert(dest->is_double_fpu(), "must be");
1267         assert(dest->fpu_regnrLo() == 0, "dest must be TOS");
1268         __ fld_d(from_addr);
1269       }
1270       break;
1271     }
1272 
1273     case T_OBJECT:  // fall through
1274     case T_ARRAY:   // fall through
1275       if (UseCompressedOops && !wide) {
1276         __ movl(dest->as_register(), from_addr);
1277       } else {
1278         __ movptr(dest->as_register(), from_addr);
1279       }
1280       break;
1281 
1282     case T_ADDRESS:
1283       __ movptr(dest->as_register(), from_addr);
1284       break;
1285     case T_INT:
1286       __ movl(dest->as_register(), from_addr);
1287       break;
1288 
1289     case T_LONG: {
1290       Register to_lo = dest->as_register_lo();
1291       Register to_hi = dest->as_register_hi();
1292 #ifdef _LP64
1293       __ movptr(to_lo, as_Address_lo(addr));
1294 #else
1295       Register base = addr->base()->as_register();
1296       Register index = noreg;
1297       if (addr->index()->is_register()) {
1298         index = addr->index()->as_register();
1299       }
1300       if ((base == to_lo && index == to_hi) ||
1301           (base == to_hi && index == to_lo)) {
1302         // addresses with 2 registers are only formed as a result of
1303         // array access so this code will never have to deal with
1304         // patches or null checks.
1305         assert(info == NULL && patch == NULL, "must be");
1306         __ lea(to_hi, as_Address(addr));
1307         __ movl(to_lo, Address(to_hi, 0));
1308         __ movl(to_hi, Address(to_hi, BytesPerWord));
1309       } else if (base == to_lo || index == to_lo) {
1310         assert(base != to_hi, "can't be");
1311         assert(index == noreg || (index != base && index != to_hi), "can't handle this");
1312         __ movl(to_hi, as_Address_hi(addr));
1313         if (patch != NULL) {
1314           patching_epilog(patch, lir_patch_high, base, info);
1315           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1316           patch_code = lir_patch_low;
1317         }
1318         __ movl(to_lo, as_Address_lo(addr));
1319       } else {
1320         assert(index == noreg || (index != base && index != to_lo), "can't handle this");
1321         __ movl(to_lo, as_Address_lo(addr));
1322         if (patch != NULL) {
1323           patching_epilog(patch, lir_patch_low, base, info);
1324           patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1325           patch_code = lir_patch_high;
1326         }
1327         __ movl(to_hi, as_Address_hi(addr));
1328       }
1329 #endif // _LP64
1330       break;
1331     }
1332 
1333     case T_BOOLEAN: // fall through
1334     case T_BYTE: {
1335       Register dest_reg = dest->as_register();
1336       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1337       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1338         __ movsbl(dest_reg, from_addr);
1339       } else {
1340         __ movb(dest_reg, from_addr);
1341         __ shll(dest_reg, 24);
1342         __ sarl(dest_reg, 24);
1343       }
1344       break;
1345     }
1346 
1347     case T_CHAR: {
1348       Register dest_reg = dest->as_register();
1349       assert(VM_Version::is_P6() || dest_reg->has_byte_register(), "must use byte registers if not P6");
1350       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1351         __ movzwl(dest_reg, from_addr);
1352       } else {
1353         __ movw(dest_reg, from_addr);
1354       }
1355       break;
1356     }
1357 
1358     case T_SHORT: {
1359       Register dest_reg = dest->as_register();
1360       if (VM_Version::is_P6() || from_addr.uses(dest_reg)) {
1361         __ movswl(dest_reg, from_addr);
1362       } else {
1363         __ movw(dest_reg, from_addr);
1364         __ shll(dest_reg, 16);
1365         __ sarl(dest_reg, 16);
1366       }
1367       break;
1368     }
1369 
1370     default:
1371       ShouldNotReachHere();
1372   }
1373 
1374   if (patch != NULL) {
1375     patching_epilog(patch, patch_code, addr->base()->as_register(), info);
1376   }
1377 
1378   if (type == T_ARRAY || type == T_OBJECT) {
1379 #ifdef _LP64
1380     if (UseCompressedOops && !wide) {
1381       __ decode_heap_oop(dest->as_register());
1382     }
1383 #endif
1384     __ verify_oop(dest->as_register());
1385   }
1386 }
1387 
1388 
1389 void LIR_Assembler::prefetchr(LIR_Opr src) {
1390   LIR_Address* addr = src->as_address_ptr();
1391   Address from_addr = as_Address(addr);
1392 
1393   if (VM_Version::supports_sse()) {
1394     switch (ReadPrefetchInstr) {
1395       case 0:
1396         __ prefetchnta(from_addr); break;
1397       case 1:
1398         __ prefetcht0(from_addr); break;
1399       case 2:
1400         __ prefetcht2(from_addr); break;
1401       default:
1402         ShouldNotReachHere(); break;
1403     }
1404   } else if (VM_Version::supports_3dnow_prefetch()) {
1405     __ prefetchr(from_addr);
1406   }
1407 }
1408 
1409 
1410 void LIR_Assembler::prefetchw(LIR_Opr src) {
1411   LIR_Address* addr = src->as_address_ptr();
1412   Address from_addr = as_Address(addr);
1413 
1414   if (VM_Version::supports_sse()) {
1415     switch (AllocatePrefetchInstr) {
1416       case 0:
1417         __ prefetchnta(from_addr); break;
1418       case 1:
1419         __ prefetcht0(from_addr); break;
1420       case 2:
1421         __ prefetcht2(from_addr); break;
1422       case 3:
1423         __ prefetchw(from_addr); break;
1424       default:
1425         ShouldNotReachHere(); break;
1426     }
1427   } else if (VM_Version::supports_3dnow_prefetch()) {
1428     __ prefetchw(from_addr);
1429   }
1430 }
1431 
1432 
1433 NEEDS_CLEANUP; // This could be static?
1434 Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const {
1435   int elem_size = type2aelembytes(type);
1436   switch (elem_size) {
1437     case 1: return Address::times_1;
1438     case 2: return Address::times_2;
1439     case 4: return Address::times_4;
1440     case 8: return Address::times_8;
1441   }
1442   ShouldNotReachHere();
1443   return Address::no_scale;
1444 }
1445 
1446 
1447 void LIR_Assembler::emit_op3(LIR_Op3* op) {
1448   switch (op->code()) {
1449     case lir_idiv:
1450     case lir_irem:
1451       arithmetic_idiv(op->code(),
1452                       op->in_opr1(),
1453                       op->in_opr2(),
1454                       op->in_opr3(),
1455                       op->result_opr(),
1456                       op->info());
1457       break;
1458     default:      ShouldNotReachHere(); break;
1459   }
1460 }
1461 
1462 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
1463 #ifdef ASSERT
1464   assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
1465   if (op->block() != NULL)  _branch_target_blocks.append(op->block());
1466   if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
1467 #endif
1468 
1469   if (op->cond() == lir_cond_always) {
1470     if (op->info() != NULL) add_debug_info_for_branch(op->info());
1471     __ jmp (*(op->label()));
1472   } else {
1473     Assembler::Condition acond = Assembler::zero;
1474     if (op->code() == lir_cond_float_branch) {
1475       assert(op->ublock() != NULL, "must have unordered successor");
1476       __ jcc(Assembler::parity, *(op->ublock()->label()));
1477       switch(op->cond()) {
1478         case lir_cond_equal:        acond = Assembler::equal;      break;
1479         case lir_cond_notEqual:     acond = Assembler::notEqual;   break;
1480         case lir_cond_less:         acond = Assembler::below;      break;
1481         case lir_cond_lessEqual:    acond = Assembler::belowEqual; break;
1482         case lir_cond_greaterEqual: acond = Assembler::aboveEqual; break;
1483         case lir_cond_greater:      acond = Assembler::above;      break;
1484         default:                         ShouldNotReachHere();
1485       }
1486     } else {
1487       switch (op->cond()) {
1488         case lir_cond_equal:        acond = Assembler::equal;       break;
1489         case lir_cond_notEqual:     acond = Assembler::notEqual;    break;
1490         case lir_cond_less:         acond = Assembler::less;        break;
1491         case lir_cond_lessEqual:    acond = Assembler::lessEqual;   break;
1492         case lir_cond_greaterEqual: acond = Assembler::greaterEqual;break;
1493         case lir_cond_greater:      acond = Assembler::greater;     break;
1494         case lir_cond_belowEqual:   acond = Assembler::belowEqual;  break;
1495         case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;  break;
1496         default:                         ShouldNotReachHere();
1497       }
1498     }
1499     __ jcc(acond,*(op->label()));
1500   }
1501 }
1502 
1503 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
1504   LIR_Opr src  = op->in_opr();
1505   LIR_Opr dest = op->result_opr();
1506 
1507   switch (op->bytecode()) {
1508     case Bytecodes::_i2l:
1509 #ifdef _LP64
1510       __ movl2ptr(dest->as_register_lo(), src->as_register());
1511 #else
1512       move_regs(src->as_register(), dest->as_register_lo());
1513       move_regs(src->as_register(), dest->as_register_hi());
1514       __ sarl(dest->as_register_hi(), 31);
1515 #endif // LP64
1516       break;
1517 
1518     case Bytecodes::_l2i:
1519       move_regs(src->as_register_lo(), dest->as_register());
1520       break;
1521 
1522     case Bytecodes::_i2b:
1523       move_regs(src->as_register(), dest->as_register());
1524       __ sign_extend_byte(dest->as_register());
1525       break;
1526 
1527     case Bytecodes::_i2c:
1528       move_regs(src->as_register(), dest->as_register());
1529       __ andl(dest->as_register(), 0xFFFF);
1530       break;
1531 
1532     case Bytecodes::_i2s:
1533       move_regs(src->as_register(), dest->as_register());
1534       __ sign_extend_short(dest->as_register());
1535       break;
1536 
1537 
1538     case Bytecodes::_f2d:
1539     case Bytecodes::_d2f:
1540       if (dest->is_single_xmm()) {
1541         __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg());
1542       } else if (dest->is_double_xmm()) {
1543         __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg());
1544       } else {
1545         assert(src->fpu() == dest->fpu(), "register must be equal");
1546         // do nothing (float result is rounded later through spilling)
1547       }
1548       break;
1549 
1550     case Bytecodes::_i2f:
1551     case Bytecodes::_i2d:
1552       if (dest->is_single_xmm()) {
1553         __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register());
1554       } else if (dest->is_double_xmm()) {
1555         __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register());
1556       } else {
1557         assert(dest->fpu() == 0, "result must be on TOS");
1558         __ movl(Address(rsp, 0), src->as_register());
1559         __ fild_s(Address(rsp, 0));
1560       }
1561       break;
1562 
1563     case Bytecodes::_f2i:
1564     case Bytecodes::_d2i:
1565       if (src->is_single_xmm()) {
1566         __ cvttss2sil(dest->as_register(), src->as_xmm_float_reg());
1567       } else if (src->is_double_xmm()) {
1568         __ cvttsd2sil(dest->as_register(), src->as_xmm_double_reg());
1569       } else {
1570         assert(src->fpu() == 0, "input must be on TOS");
1571         __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
1572         __ fist_s(Address(rsp, 0));
1573         __ movl(dest->as_register(), Address(rsp, 0));
1574         __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1575       }
1576 
1577       // IA32 conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
1578       assert(op->stub() != NULL, "stub required");
1579       __ cmpl(dest->as_register(), 0x80000000);
1580       __ jcc(Assembler::equal, *op->stub()->entry());
1581       __ bind(*op->stub()->continuation());
1582       break;
1583 
1584     case Bytecodes::_l2f:
1585     case Bytecodes::_l2d:
1586       assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)");
1587       assert(dest->fpu() == 0, "result must be on TOS");
1588 
1589       __ movptr(Address(rsp, 0),            src->as_register_lo());
1590       NOT_LP64(__ movl(Address(rsp, BytesPerWord), src->as_register_hi()));
1591       __ fild_d(Address(rsp, 0));
1592       // float result is rounded later through spilling
1593       break;
1594 
1595     case Bytecodes::_f2l:
1596     case Bytecodes::_d2l:
1597       assert(!src->is_xmm_register(), "input in xmm register not supported (no SSE instruction present)");
1598       assert(src->fpu() == 0, "input must be on TOS");
1599       assert(dest == FrameMap::long0_opr, "runtime stub places result in these registers");
1600 
1601       // instruction sequence too long to inline it here
1602       {
1603         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::fpu2long_stub_id)));
1604       }
1605       break;
1606 
1607     default: ShouldNotReachHere();
1608   }
1609 }
1610 
1611 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
1612   if (op->init_check()) {
1613     __ cmpl(Address(op->klass()->as_register(),
1614                     instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)),
1615             instanceKlass::fully_initialized);
1616     add_debug_info_for_null_check_here(op->stub()->info());
1617     __ jcc(Assembler::notEqual, *op->stub()->entry());
1618   }
1619   __ allocate_object(op->obj()->as_register(),
1620                      op->tmp1()->as_register(),
1621                      op->tmp2()->as_register(),
1622                      op->header_size(),
1623                      op->object_size(),
1624                      op->klass()->as_register(),
1625                      *op->stub()->entry());
1626   __ bind(*op->stub()->continuation());
1627 }
1628 
1629 void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
1630   Register len =  op->len()->as_register();
1631   LP64_ONLY( __ movslq(len, len); )
1632 
1633   if (UseSlowPath ||
1634       (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
1635       (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
1636     __ jmp(*op->stub()->entry());
1637   } else {
1638     Register tmp1 = op->tmp1()->as_register();
1639     Register tmp2 = op->tmp2()->as_register();
1640     Register tmp3 = op->tmp3()->as_register();
1641     if (len == tmp1) {
1642       tmp1 = tmp3;
1643     } else if (len == tmp2) {
1644       tmp2 = tmp3;
1645     } else if (len == tmp3) {
1646       // everything is ok
1647     } else {
1648       __ mov(tmp3, len);
1649     }
1650     __ allocate_array(op->obj()->as_register(),
1651                       len,
1652                       tmp1,
1653                       tmp2,
1654                       arrayOopDesc::header_size(op->type()),
1655                       array_element_size(op->type()),
1656                       op->klass()->as_register(),
1657                       *op->stub()->entry());
1658   }
1659   __ bind(*op->stub()->continuation());
1660 }
1661 
1662 void LIR_Assembler::type_profile_helper(Register mdo,
1663                                         ciMethodData *md, ciProfileData *data,
1664                                         Register recv, Label* update_done) {
1665   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1666     Label next_test;
1667     // See if the receiver is receiver[n].
1668     __ cmpptr(recv, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
1669     __ jccb(Assembler::notEqual, next_test);
1670     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
1671     __ addptr(data_addr, DataLayout::counter_increment);
1672     __ jmp(*update_done);
1673     __ bind(next_test);
1674   }
1675 
1676   // Didn't find receiver; find next empty slot and fill it in
1677   for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
1678     Label next_test;
1679     Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
1680     __ cmpptr(recv_addr, (intptr_t)NULL_WORD);
1681     __ jccb(Assembler::notEqual, next_test);
1682     __ movptr(recv_addr, recv);
1683     __ movptr(Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))), DataLayout::counter_increment);
1684     __ jmp(*update_done);
1685     __ bind(next_test);
1686   }
1687 }
1688 
1689 void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
1690   // we always need a stub for the failure case.
1691   CodeStub* stub = op->stub();
1692   Register obj = op->object()->as_register();
1693   Register k_RInfo = op->tmp1()->as_register();
1694   Register klass_RInfo = op->tmp2()->as_register();
1695   Register dst = op->result_opr()->as_register();
1696   ciKlass* k = op->klass();
1697   Register Rtmp1 = noreg;
1698 
1699   // check if it needs to be profiled
1700   ciMethodData* md;
1701   ciProfileData* data;
1702 
1703   if (op->should_profile()) {
1704     ciMethod* method = op->profiled_method();
1705     assert(method != NULL, "Should have method");
1706     int bci = op->profiled_bci();
1707     md = method->method_data_or_null();
1708     assert(md != NULL, "Sanity");
1709     data = md->bci_to_data(bci);
1710     assert(data != NULL,                "need data for type check");
1711     assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1712   }
1713   Label profile_cast_success, profile_cast_failure;
1714   Label *success_target = op->should_profile() ? &profile_cast_success : success;
1715   Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
1716 
1717   if (obj == k_RInfo) {
1718     k_RInfo = dst;
1719   } else if (obj == klass_RInfo) {
1720     klass_RInfo = dst;
1721   }
1722   if (k->is_loaded() && !UseCompressedOops) {
1723     select_different_registers(obj, dst, k_RInfo, klass_RInfo);
1724   } else {
1725     Rtmp1 = op->tmp3()->as_register();
1726     select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
1727   }
1728 
1729   assert_different_registers(obj, k_RInfo, klass_RInfo);
1730   if (!k->is_loaded()) {
1731     jobject2reg_with_patching(k_RInfo, op->info_for_patch());
1732   } else {
1733 #ifdef _LP64
1734     __ movoop(k_RInfo, k->constant_encoding());
1735 #endif // _LP64
1736   }
1737   assert(obj != k_RInfo, "must be different");
1738 
1739   __ cmpptr(obj, (int32_t)NULL_WORD);
1740   if (op->should_profile()) {
1741     Label not_null;
1742     __ jccb(Assembler::notEqual, not_null);
1743     // Object is null; update MDO and exit
1744     Register mdo  = klass_RInfo;
1745     __ movoop(mdo, md->constant_encoding());
1746     Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
1747     int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
1748     __ orl(data_addr, header_bits);
1749     __ jmp(*obj_is_null);
1750     __ bind(not_null);
1751   } else {
1752     __ jcc(Assembler::equal, *obj_is_null);
1753   }
1754   __ verify_oop(obj);
1755 
1756   if (op->fast_check()) {
1757     // get object class
1758     // not a safepoint as obj null check happens earlier
1759 #ifdef _LP64
1760     if (UseCompressedOops) {
1761       __ load_klass(Rtmp1, obj);
1762       __ cmpptr(k_RInfo, Rtmp1);
1763     } else {
1764       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1765     }
1766 #else
1767     if (k->is_loaded()) {
1768       __ cmpoop(Address(obj, oopDesc::klass_offset_in_bytes()), k->constant_encoding());
1769     } else {
1770       __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes()));
1771     }
1772 #endif
1773     __ jcc(Assembler::notEqual, *failure_target);
1774     // successful cast, fall through to profile or jump
1775   } else {
1776     // get object class
1777     // not a safepoint as obj null check happens earlier
1778     __ load_klass(klass_RInfo, obj);
1779     if (k->is_loaded()) {
1780       // See if we get an immediate positive hit
1781 #ifdef _LP64
1782       __ cmpptr(k_RInfo, Address(klass_RInfo, k->super_check_offset()));
1783 #else
1784       __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding());
1785 #endif // _LP64
1786       if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
1787         __ jcc(Assembler::notEqual, *failure_target);
1788         // successful cast, fall through to profile or jump
1789       } else {
1790         // See if we get an immediate positive hit
1791         __ jcc(Assembler::equal, *success_target);
1792         // check for self
1793 #ifdef _LP64
1794         __ cmpptr(klass_RInfo, k_RInfo);
1795 #else
1796         __ cmpoop(klass_RInfo, k->constant_encoding());
1797 #endif // _LP64
1798         __ jcc(Assembler::equal, *success_target);
1799 
1800         __ push(klass_RInfo);
1801 #ifdef _LP64
1802         __ push(k_RInfo);
1803 #else
1804         __ pushoop(k->constant_encoding());
1805 #endif // _LP64
1806         __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1807         __ pop(klass_RInfo);
1808         __ pop(klass_RInfo);
1809         // result is a boolean
1810         __ cmpl(klass_RInfo, 0);
1811         __ jcc(Assembler::equal, *failure_target);
1812         // successful cast, fall through to profile or jump
1813       }
1814     } else {
1815       // perform the fast part of the checking logic
1816       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1817       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1818       __ push(klass_RInfo);
1819       __ push(k_RInfo);
1820       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1821       __ pop(klass_RInfo);
1822       __ pop(k_RInfo);
1823       // result is a boolean
1824       __ cmpl(k_RInfo, 0);
1825       __ jcc(Assembler::equal, *failure_target);
1826       // successful cast, fall through to profile or jump
1827     }
1828   }
1829   if (op->should_profile()) {
1830     Register mdo  = klass_RInfo, recv = k_RInfo;
1831     __ bind(profile_cast_success);
1832     __ movoop(mdo, md->constant_encoding());
1833     __ load_klass(recv, obj);
1834     Label update_done;
1835     type_profile_helper(mdo, md, data, recv, success);
1836     __ jmp(*success);
1837 
1838     __ bind(profile_cast_failure);
1839     __ movoop(mdo, md->constant_encoding());
1840     Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1841     __ subptr(counter_addr, DataLayout::counter_increment);
1842     __ jmp(*failure);
1843   }
1844   __ jmp(*success);
1845 }
1846 
1847 
1848 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
1849   LIR_Code code = op->code();
1850   if (code == lir_store_check) {
1851     Register value = op->object()->as_register();
1852     Register array = op->array()->as_register();
1853     Register k_RInfo = op->tmp1()->as_register();
1854     Register klass_RInfo = op->tmp2()->as_register();
1855     Register Rtmp1 = op->tmp3()->as_register();
1856 
1857     CodeStub* stub = op->stub();
1858 
1859     // check if it needs to be profiled
1860     ciMethodData* md;
1861     ciProfileData* data;
1862 
1863     if (op->should_profile()) {
1864       ciMethod* method = op->profiled_method();
1865       assert(method != NULL, "Should have method");
1866       int bci = op->profiled_bci();
1867       md = method->method_data_or_null();
1868       assert(md != NULL, "Sanity");
1869       data = md->bci_to_data(bci);
1870       assert(data != NULL,                "need data for type check");
1871       assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
1872     }
1873     Label profile_cast_success, profile_cast_failure, done;
1874     Label *success_target = op->should_profile() ? &profile_cast_success : &done;
1875     Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
1876 
1877     __ cmpptr(value, (int32_t)NULL_WORD);
1878     if (op->should_profile()) {
1879       Label not_null;
1880       __ jccb(Assembler::notEqual, not_null);
1881       // Object is null; update MDO and exit
1882       Register mdo  = klass_RInfo;
1883       __ movoop(mdo, md->constant_encoding());
1884       Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset()));
1885       int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant());
1886       __ orl(data_addr, header_bits);
1887       __ jmp(done);
1888       __ bind(not_null);
1889     } else {
1890       __ jcc(Assembler::equal, done);
1891     }
1892 
1893     add_debug_info_for_null_check_here(op->info_for_exception());
1894     __ load_klass(k_RInfo, array);
1895     __ load_klass(klass_RInfo, value);
1896 
1897     // get instance klass (it's already uncompressed)
1898     __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
1899     // perform the fast part of the checking logic
1900     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
1901     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
1902     __ push(klass_RInfo);
1903     __ push(k_RInfo);
1904     __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
1905     __ pop(klass_RInfo);
1906     __ pop(k_RInfo);
1907     // result is a boolean
1908     __ cmpl(k_RInfo, 0);
1909     __ jcc(Assembler::equal, *failure_target);
1910     // fall through to the success case
1911 
1912     if (op->should_profile()) {
1913       Register mdo  = klass_RInfo, recv = k_RInfo;
1914       __ bind(profile_cast_success);
1915       __ movoop(mdo, md->constant_encoding());
1916       __ load_klass(recv, value);
1917       Label update_done;
1918       type_profile_helper(mdo, md, data, recv, &done);
1919       __ jmpb(done);
1920 
1921       __ bind(profile_cast_failure);
1922       __ movoop(mdo, md->constant_encoding());
1923       Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
1924       __ subptr(counter_addr, DataLayout::counter_increment);
1925       __ jmp(*stub->entry());
1926     }
1927 
1928     __ bind(done);
1929   } else
1930     if (code == lir_checkcast) {
1931       Register obj = op->object()->as_register();
1932       Register dst = op->result_opr()->as_register();
1933       Label success;
1934       emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
1935       __ bind(success);
1936       if (dst != obj) {
1937         __ mov(dst, obj);
1938       }
1939     } else
1940       if (code == lir_instanceof) {
1941         Register obj = op->object()->as_register();
1942         Register dst = op->result_opr()->as_register();
1943         Label success, failure, done;
1944         emit_typecheck_helper(op, &success, &failure, &failure);
1945         __ bind(failure);
1946         __ xorptr(dst, dst);
1947         __ jmpb(done);
1948         __ bind(success);
1949         __ movptr(dst, 1);
1950         __ bind(done);
1951       } else {
1952         ShouldNotReachHere();
1953       }
1954 
1955 }
1956 
1957 
1958 void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
1959   if (LP64_ONLY(false &&) op->code() == lir_cas_long && VM_Version::supports_cx8()) {
1960     assert(op->cmp_value()->as_register_lo() == rax, "wrong register");
1961     assert(op->cmp_value()->as_register_hi() == rdx, "wrong register");
1962     assert(op->new_value()->as_register_lo() == rbx, "wrong register");
1963     assert(op->new_value()->as_register_hi() == rcx, "wrong register");
1964     Register addr = op->addr()->as_register();
1965     if (os::is_MP()) {
1966       __ lock();
1967     }
1968     NOT_LP64(__ cmpxchg8(Address(addr, 0)));
1969 
1970   } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj ) {
1971     NOT_LP64(assert(op->addr()->is_single_cpu(), "must be single");)
1972     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
1973     Register newval = op->new_value()->as_register();
1974     Register cmpval = op->cmp_value()->as_register();
1975     assert(cmpval == rax, "wrong register");
1976     assert(newval != NULL, "new val must be register");
1977     assert(cmpval != newval, "cmp and new values must be in different registers");
1978     assert(cmpval != addr, "cmp and addr must be in different registers");
1979     assert(newval != addr, "new value and addr must be in different registers");
1980 
1981     if ( op->code() == lir_cas_obj) {
1982 #ifdef _LP64
1983       if (UseCompressedOops) {
1984         __ encode_heap_oop(cmpval);
1985         __ mov(rscratch1, newval);
1986         __ encode_heap_oop(rscratch1);
1987         if (os::is_MP()) {
1988           __ lock();
1989         }
1990         // cmpval (rax) is implicitly used by this instruction
1991         __ cmpxchgl(rscratch1, Address(addr, 0));
1992       } else
1993 #endif
1994       {
1995         if (os::is_MP()) {
1996           __ lock();
1997         }
1998         __ cmpxchgptr(newval, Address(addr, 0));
1999       }
2000     } else {
2001       assert(op->code() == lir_cas_int, "lir_cas_int expected");
2002       if (os::is_MP()) {
2003         __ lock();
2004       }
2005       __ cmpxchgl(newval, Address(addr, 0));
2006     }
2007 #ifdef _LP64
2008   } else if (op->code() == lir_cas_long) {
2009     Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo());
2010     Register newval = op->new_value()->as_register_lo();
2011     Register cmpval = op->cmp_value()->as_register_lo();
2012     assert(cmpval == rax, "wrong register");
2013     assert(newval != NULL, "new val must be register");
2014     assert(cmpval != newval, "cmp and new values must be in different registers");
2015     assert(cmpval != addr, "cmp and addr must be in different registers");
2016     assert(newval != addr, "new value and addr must be in different registers");
2017     if (os::is_MP()) {
2018       __ lock();
2019     }
2020     __ cmpxchgq(newval, Address(addr, 0));
2021 #endif // _LP64
2022   } else {
2023     Unimplemented();
2024   }
2025 }
2026 
2027 void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
2028   Assembler::Condition acond, ncond;
2029   switch (condition) {
2030     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
2031     case lir_cond_notEqual:     acond = Assembler::notEqual;     ncond = Assembler::equal;        break;
2032     case lir_cond_less:         acond = Assembler::less;         ncond = Assembler::greaterEqual; break;
2033     case lir_cond_lessEqual:    acond = Assembler::lessEqual;    ncond = Assembler::greater;      break;
2034     case lir_cond_greaterEqual: acond = Assembler::greaterEqual; ncond = Assembler::less;         break;
2035     case lir_cond_greater:      acond = Assembler::greater;      ncond = Assembler::lessEqual;    break;
2036     case lir_cond_belowEqual:   acond = Assembler::belowEqual;   ncond = Assembler::above;        break;
2037     case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;   ncond = Assembler::below;        break;
2038     default:                    ShouldNotReachHere();
2039   }
2040 
2041   if (opr1->is_cpu_register()) {
2042     reg2reg(opr1, result);
2043   } else if (opr1->is_stack()) {
2044     stack2reg(opr1, result, result->type());
2045   } else if (opr1->is_constant()) {
2046     const2reg(opr1, result, lir_patch_none, NULL);
2047   } else {
2048     ShouldNotReachHere();
2049   }
2050 
2051   if (VM_Version::supports_cmov() && !opr2->is_constant()) {
2052     // optimized version that does not require a branch
2053     if (opr2->is_single_cpu()) {
2054       assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move");
2055       __ cmov(ncond, result->as_register(), opr2->as_register());
2056     } else if (opr2->is_double_cpu()) {
2057       assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2058       assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move");
2059       __ cmovptr(ncond, result->as_register_lo(), opr2->as_register_lo());
2060       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), opr2->as_register_hi());)
2061     } else if (opr2->is_single_stack()) {
2062       __ cmovl(ncond, result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()));
2063     } else if (opr2->is_double_stack()) {
2064       __ cmovptr(ncond, result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix(), lo_word_offset_in_bytes));
2065       NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), frame_map()->address_for_slot(opr2->double_stack_ix(), hi_word_offset_in_bytes));)
2066     } else {
2067       ShouldNotReachHere();
2068     }
2069 
2070   } else {
2071     Label skip;
2072     __ jcc (acond, skip);
2073     if (opr2->is_cpu_register()) {
2074       reg2reg(opr2, result);
2075     } else if (opr2->is_stack()) {
2076       stack2reg(opr2, result, result->type());
2077     } else if (opr2->is_constant()) {
2078       const2reg(opr2, result, lir_patch_none, NULL);
2079     } else {
2080       ShouldNotReachHere();
2081     }
2082     __ bind(skip);
2083   }
2084 }
2085 
2086 
2087 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
2088   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
2089 
2090   if (left->is_single_cpu()) {
2091     assert(left == dest, "left and dest must be equal");
2092     Register lreg = left->as_register();
2093 
2094     if (right->is_single_cpu()) {
2095       // cpu register - cpu register
2096       Register rreg = right->as_register();
2097       switch (code) {
2098         case lir_add: __ addl (lreg, rreg); break;
2099         case lir_sub: __ subl (lreg, rreg); break;
2100         case lir_mul: __ imull(lreg, rreg); break;
2101         default:      ShouldNotReachHere();
2102       }
2103 
2104     } else if (right->is_stack()) {
2105       // cpu register - stack
2106       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2107       switch (code) {
2108         case lir_add: __ addl(lreg, raddr); break;
2109         case lir_sub: __ subl(lreg, raddr); break;
2110         default:      ShouldNotReachHere();
2111       }
2112 
2113     } else if (right->is_constant()) {
2114       // cpu register - constant
2115       jint c = right->as_constant_ptr()->as_jint();
2116       switch (code) {
2117         case lir_add: {
2118           __ incrementl(lreg, c);
2119           break;
2120         }
2121         case lir_sub: {
2122           __ decrementl(lreg, c);
2123           break;
2124         }
2125         default: ShouldNotReachHere();
2126       }
2127 
2128     } else {
2129       ShouldNotReachHere();
2130     }
2131 
2132   } else if (left->is_double_cpu()) {
2133     assert(left == dest, "left and dest must be equal");
2134     Register lreg_lo = left->as_register_lo();
2135     Register lreg_hi = left->as_register_hi();
2136 
2137     if (right->is_double_cpu()) {
2138       // cpu register - cpu register
2139       Register rreg_lo = right->as_register_lo();
2140       Register rreg_hi = right->as_register_hi();
2141       NOT_LP64(assert_different_registers(lreg_lo, lreg_hi, rreg_lo, rreg_hi));
2142       LP64_ONLY(assert_different_registers(lreg_lo, rreg_lo));
2143       switch (code) {
2144         case lir_add:
2145           __ addptr(lreg_lo, rreg_lo);
2146           NOT_LP64(__ adcl(lreg_hi, rreg_hi));
2147           break;
2148         case lir_sub:
2149           __ subptr(lreg_lo, rreg_lo);
2150           NOT_LP64(__ sbbl(lreg_hi, rreg_hi));
2151           break;
2152         case lir_mul:
2153 #ifdef _LP64
2154           __ imulq(lreg_lo, rreg_lo);
2155 #else
2156           assert(lreg_lo == rax && lreg_hi == rdx, "must be");
2157           __ imull(lreg_hi, rreg_lo);
2158           __ imull(rreg_hi, lreg_lo);
2159           __ addl (rreg_hi, lreg_hi);
2160           __ mull (rreg_lo);
2161           __ addl (lreg_hi, rreg_hi);
2162 #endif // _LP64
2163           break;
2164         default:
2165           ShouldNotReachHere();
2166       }
2167 
2168     } else if (right->is_constant()) {
2169       // cpu register - constant
2170 #ifdef _LP64
2171       jlong c = right->as_constant_ptr()->as_jlong_bits();
2172       __ movptr(r10, (intptr_t) c);
2173       switch (code) {
2174         case lir_add:
2175           __ addptr(lreg_lo, r10);
2176           break;
2177         case lir_sub:
2178           __ subptr(lreg_lo, r10);
2179           break;
2180         default:
2181           ShouldNotReachHere();
2182       }
2183 #else
2184       jint c_lo = right->as_constant_ptr()->as_jint_lo();
2185       jint c_hi = right->as_constant_ptr()->as_jint_hi();
2186       switch (code) {
2187         case lir_add:
2188           __ addptr(lreg_lo, c_lo);
2189           __ adcl(lreg_hi, c_hi);
2190           break;
2191         case lir_sub:
2192           __ subptr(lreg_lo, c_lo);
2193           __ sbbl(lreg_hi, c_hi);
2194           break;
2195         default:
2196           ShouldNotReachHere();
2197       }
2198 #endif // _LP64
2199 
2200     } else {
2201       ShouldNotReachHere();
2202     }
2203 
2204   } else if (left->is_single_xmm()) {
2205     assert(left == dest, "left and dest must be equal");
2206     XMMRegister lreg = left->as_xmm_float_reg();
2207 
2208     if (right->is_single_xmm()) {
2209       XMMRegister rreg = right->as_xmm_float_reg();
2210       switch (code) {
2211         case lir_add: __ addss(lreg, rreg);  break;
2212         case lir_sub: __ subss(lreg, rreg);  break;
2213         case lir_mul_strictfp: // fall through
2214         case lir_mul: __ mulss(lreg, rreg);  break;
2215         case lir_div_strictfp: // fall through
2216         case lir_div: __ divss(lreg, rreg);  break;
2217         default: ShouldNotReachHere();
2218       }
2219     } else {
2220       Address raddr;
2221       if (right->is_single_stack()) {
2222         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2223       } else if (right->is_constant()) {
2224         // hack for now
2225         raddr = __ as_Address(InternalAddress(float_constant(right->as_jfloat())));
2226       } else {
2227         ShouldNotReachHere();
2228       }
2229       switch (code) {
2230         case lir_add: __ addss(lreg, raddr);  break;
2231         case lir_sub: __ subss(lreg, raddr);  break;
2232         case lir_mul_strictfp: // fall through
2233         case lir_mul: __ mulss(lreg, raddr);  break;
2234         case lir_div_strictfp: // fall through
2235         case lir_div: __ divss(lreg, raddr);  break;
2236         default: ShouldNotReachHere();
2237       }
2238     }
2239 
2240   } else if (left->is_double_xmm()) {
2241     assert(left == dest, "left and dest must be equal");
2242 
2243     XMMRegister lreg = left->as_xmm_double_reg();
2244     if (right->is_double_xmm()) {
2245       XMMRegister rreg = right->as_xmm_double_reg();
2246       switch (code) {
2247         case lir_add: __ addsd(lreg, rreg);  break;
2248         case lir_sub: __ subsd(lreg, rreg);  break;
2249         case lir_mul_strictfp: // fall through
2250         case lir_mul: __ mulsd(lreg, rreg);  break;
2251         case lir_div_strictfp: // fall through
2252         case lir_div: __ divsd(lreg, rreg);  break;
2253         default: ShouldNotReachHere();
2254       }
2255     } else {
2256       Address raddr;
2257       if (right->is_double_stack()) {
2258         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2259       } else if (right->is_constant()) {
2260         // hack for now
2261         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2262       } else {
2263         ShouldNotReachHere();
2264       }
2265       switch (code) {
2266         case lir_add: __ addsd(lreg, raddr);  break;
2267         case lir_sub: __ subsd(lreg, raddr);  break;
2268         case lir_mul_strictfp: // fall through
2269         case lir_mul: __ mulsd(lreg, raddr);  break;
2270         case lir_div_strictfp: // fall through
2271         case lir_div: __ divsd(lreg, raddr);  break;
2272         default: ShouldNotReachHere();
2273       }
2274     }
2275 
2276   } else if (left->is_single_fpu()) {
2277     assert(dest->is_single_fpu(),  "fpu stack allocation required");
2278 
2279     if (right->is_single_fpu()) {
2280       arith_fpu_implementation(code, left->fpu_regnr(), right->fpu_regnr(), dest->fpu_regnr(), pop_fpu_stack);
2281 
2282     } else {
2283       assert(left->fpu_regnr() == 0, "left must be on TOS");
2284       assert(dest->fpu_regnr() == 0, "dest must be on TOS");
2285 
2286       Address raddr;
2287       if (right->is_single_stack()) {
2288         raddr = frame_map()->address_for_slot(right->single_stack_ix());
2289       } else if (right->is_constant()) {
2290         address const_addr = float_constant(right->as_jfloat());
2291         assert(const_addr != NULL, "incorrect float/double constant maintainance");
2292         // hack for now
2293         raddr = __ as_Address(InternalAddress(const_addr));
2294       } else {
2295         ShouldNotReachHere();
2296       }
2297 
2298       switch (code) {
2299         case lir_add: __ fadd_s(raddr); break;
2300         case lir_sub: __ fsub_s(raddr); break;
2301         case lir_mul_strictfp: // fall through
2302         case lir_mul: __ fmul_s(raddr); break;
2303         case lir_div_strictfp: // fall through
2304         case lir_div: __ fdiv_s(raddr); break;
2305         default:      ShouldNotReachHere();
2306       }
2307     }
2308 
2309   } else if (left->is_double_fpu()) {
2310     assert(dest->is_double_fpu(),  "fpu stack allocation required");
2311 
2312     if (code == lir_mul_strictfp || code == lir_div_strictfp) {
2313       // Double values require special handling for strictfp mul/div on x86
2314       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
2315       __ fmulp(left->fpu_regnrLo() + 1);
2316     }
2317 
2318     if (right->is_double_fpu()) {
2319       arith_fpu_implementation(code, left->fpu_regnrLo(), right->fpu_regnrLo(), dest->fpu_regnrLo(), pop_fpu_stack);
2320 
2321     } else {
2322       assert(left->fpu_regnrLo() == 0, "left must be on TOS");
2323       assert(dest->fpu_regnrLo() == 0, "dest must be on TOS");
2324 
2325       Address raddr;
2326       if (right->is_double_stack()) {
2327         raddr = frame_map()->address_for_slot(right->double_stack_ix());
2328       } else if (right->is_constant()) {
2329         // hack for now
2330         raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble())));
2331       } else {
2332         ShouldNotReachHere();
2333       }
2334 
2335       switch (code) {
2336         case lir_add: __ fadd_d(raddr); break;
2337         case lir_sub: __ fsub_d(raddr); break;
2338         case lir_mul_strictfp: // fall through
2339         case lir_mul: __ fmul_d(raddr); break;
2340         case lir_div_strictfp: // fall through
2341         case lir_div: __ fdiv_d(raddr); break;
2342         default: ShouldNotReachHere();
2343       }
2344     }
2345 
2346     if (code == lir_mul_strictfp || code == lir_div_strictfp) {
2347       // Double values require special handling for strictfp mul/div on x86
2348       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
2349       __ fmulp(dest->fpu_regnrLo() + 1);
2350     }
2351 
2352   } else if (left->is_single_stack() || left->is_address()) {
2353     assert(left == dest, "left and dest must be equal");
2354 
2355     Address laddr;
2356     if (left->is_single_stack()) {
2357       laddr = frame_map()->address_for_slot(left->single_stack_ix());
2358     } else if (left->is_address()) {
2359       laddr = as_Address(left->as_address_ptr());
2360     } else {
2361       ShouldNotReachHere();
2362     }
2363 
2364     if (right->is_single_cpu()) {
2365       Register rreg = right->as_register();
2366       switch (code) {
2367         case lir_add: __ addl(laddr, rreg); break;
2368         case lir_sub: __ subl(laddr, rreg); break;
2369         default:      ShouldNotReachHere();
2370       }
2371     } else if (right->is_constant()) {
2372       jint c = right->as_constant_ptr()->as_jint();
2373       switch (code) {
2374         case lir_add: {
2375           __ incrementl(laddr, c);
2376           break;
2377         }
2378         case lir_sub: {
2379           __ decrementl(laddr, c);
2380           break;
2381         }
2382         default: ShouldNotReachHere();
2383       }
2384     } else {
2385       ShouldNotReachHere();
2386     }
2387 
2388   } else {
2389     ShouldNotReachHere();
2390   }
2391 }
2392 
2393 void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) {
2394   assert(pop_fpu_stack  || (left_index     == dest_index || right_index     == dest_index), "invalid LIR");
2395   assert(!pop_fpu_stack || (left_index - 1 == dest_index || right_index - 1 == dest_index), "invalid LIR");
2396   assert(left_index == 0 || right_index == 0, "either must be on top of stack");
2397 
2398   bool left_is_tos = (left_index == 0);
2399   bool dest_is_tos = (dest_index == 0);
2400   int non_tos_index = (left_is_tos ? right_index : left_index);
2401 
2402   switch (code) {
2403     case lir_add:
2404       if (pop_fpu_stack)       __ faddp(non_tos_index);
2405       else if (dest_is_tos)    __ fadd (non_tos_index);
2406       else                     __ fadda(non_tos_index);
2407       break;
2408 
2409     case lir_sub:
2410       if (left_is_tos) {
2411         if (pop_fpu_stack)     __ fsubrp(non_tos_index);
2412         else if (dest_is_tos)  __ fsub  (non_tos_index);
2413         else                   __ fsubra(non_tos_index);
2414       } else {
2415         if (pop_fpu_stack)     __ fsubp (non_tos_index);
2416         else if (dest_is_tos)  __ fsubr (non_tos_index);
2417         else                   __ fsuba (non_tos_index);
2418       }
2419       break;
2420 
2421     case lir_mul_strictfp: // fall through
2422     case lir_mul:
2423       if (pop_fpu_stack)       __ fmulp(non_tos_index);
2424       else if (dest_is_tos)    __ fmul (non_tos_index);
2425       else                     __ fmula(non_tos_index);
2426       break;
2427 
2428     case lir_div_strictfp: // fall through
2429     case lir_div:
2430       if (left_is_tos) {
2431         if (pop_fpu_stack)     __ fdivrp(non_tos_index);
2432         else if (dest_is_tos)  __ fdiv  (non_tos_index);
2433         else                   __ fdivra(non_tos_index);
2434       } else {
2435         if (pop_fpu_stack)     __ fdivp (non_tos_index);
2436         else if (dest_is_tos)  __ fdivr (non_tos_index);
2437         else                   __ fdiva (non_tos_index);
2438       }
2439       break;
2440 
2441     case lir_rem:
2442       assert(left_is_tos && dest_is_tos && right_index == 1, "must be guaranteed by FPU stack allocation");
2443       __ fremr(noreg);
2444       break;
2445 
2446     default:
2447       ShouldNotReachHere();
2448   }
2449 }
2450 
2451 
2452 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
2453   if (value->is_double_xmm()) {
2454     switch(code) {
2455       case lir_abs :
2456         {
2457           if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
2458             __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
2459           }
2460           __ andpd(dest->as_xmm_double_reg(),
2461                     ExternalAddress((address)double_signmask_pool));
2462         }
2463         break;
2464 
2465       case lir_sqrt: __ sqrtsd(dest->as_xmm_double_reg(), value->as_xmm_double_reg()); break;
2466       // all other intrinsics are not available in the SSE instruction set, so FPU is used
2467       default      : ShouldNotReachHere();
2468     }
2469 
2470   } else if (value->is_double_fpu()) {
2471     assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
2472     switch(code) {
2473       case lir_log   : __ flog() ; break;
2474       case lir_log10 : __ flog10() ; break;
2475       case lir_abs   : __ fabs() ; break;
2476       case lir_sqrt  : __ fsqrt(); break;
2477       case lir_sin   :
2478         // Should consider not saving rbx, if not necessary
2479         __ trigfunc('s', op->as_Op2()->fpu_stack_size());
2480         break;
2481       case lir_cos :
2482         // Should consider not saving rbx, if not necessary
2483         assert(op->as_Op2()->fpu_stack_size() <= 6, "sin and cos need two free stack slots");
2484         __ trigfunc('c', op->as_Op2()->fpu_stack_size());
2485         break;
2486       case lir_tan :
2487         // Should consider not saving rbx, if not necessary
2488         __ trigfunc('t', op->as_Op2()->fpu_stack_size());
2489         break;
2490       default      : ShouldNotReachHere();
2491     }
2492   } else {
2493     Unimplemented();
2494   }
2495 }
2496 
2497 void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
2498   // assert(left->destroys_register(), "check");
2499   if (left->is_single_cpu()) {
2500     Register reg = left->as_register();
2501     if (right->is_constant()) {
2502       int val = right->as_constant_ptr()->as_jint();
2503       switch (code) {
2504         case lir_logic_and: __ andl (reg, val); break;
2505         case lir_logic_or:  __ orl  (reg, val); break;
2506         case lir_logic_xor: __ xorl (reg, val); break;
2507         default: ShouldNotReachHere();
2508       }
2509     } else if (right->is_stack()) {
2510       // added support for stack operands
2511       Address raddr = frame_map()->address_for_slot(right->single_stack_ix());
2512       switch (code) {
2513         case lir_logic_and: __ andl (reg, raddr); break;
2514         case lir_logic_or:  __ orl  (reg, raddr); break;
2515         case lir_logic_xor: __ xorl (reg, raddr); break;
2516         default: ShouldNotReachHere();
2517       }
2518     } else {
2519       Register rright = right->as_register();
2520       switch (code) {
2521         case lir_logic_and: __ andptr (reg, rright); break;
2522         case lir_logic_or : __ orptr  (reg, rright); break;
2523         case lir_logic_xor: __ xorptr (reg, rright); break;
2524         default: ShouldNotReachHere();
2525       }
2526     }
2527     move_regs(reg, dst->as_register());
2528   } else {
2529     Register l_lo = left->as_register_lo();
2530     Register l_hi = left->as_register_hi();
2531     if (right->is_constant()) {
2532 #ifdef _LP64
2533       __ mov64(rscratch1, right->as_constant_ptr()->as_jlong());
2534       switch (code) {
2535         case lir_logic_and:
2536           __ andq(l_lo, rscratch1);
2537           break;
2538         case lir_logic_or:
2539           __ orq(l_lo, rscratch1);
2540           break;
2541         case lir_logic_xor:
2542           __ xorq(l_lo, rscratch1);
2543           break;
2544         default: ShouldNotReachHere();
2545       }
2546 #else
2547       int r_lo = right->as_constant_ptr()->as_jint_lo();
2548       int r_hi = right->as_constant_ptr()->as_jint_hi();
2549       switch (code) {
2550         case lir_logic_and:
2551           __ andl(l_lo, r_lo);
2552           __ andl(l_hi, r_hi);
2553           break;
2554         case lir_logic_or:
2555           __ orl(l_lo, r_lo);
2556           __ orl(l_hi, r_hi);
2557           break;
2558         case lir_logic_xor:
2559           __ xorl(l_lo, r_lo);
2560           __ xorl(l_hi, r_hi);
2561           break;
2562         default: ShouldNotReachHere();
2563       }
2564 #endif // _LP64
2565     } else {
2566 #ifdef _LP64
2567       Register r_lo;
2568       if (right->type() == T_OBJECT || right->type() == T_ARRAY) {
2569         r_lo = right->as_register();
2570       } else {
2571         r_lo = right->as_register_lo();
2572       }
2573 #else
2574       Register r_lo = right->as_register_lo();
2575       Register r_hi = right->as_register_hi();
2576       assert(l_lo != r_hi, "overwriting registers");
2577 #endif
2578       switch (code) {
2579         case lir_logic_and:
2580           __ andptr(l_lo, r_lo);
2581           NOT_LP64(__ andptr(l_hi, r_hi);)
2582           break;
2583         case lir_logic_or:
2584           __ orptr(l_lo, r_lo);
2585           NOT_LP64(__ orptr(l_hi, r_hi);)
2586           break;
2587         case lir_logic_xor:
2588           __ xorptr(l_lo, r_lo);
2589           NOT_LP64(__ xorptr(l_hi, r_hi);)
2590           break;
2591         default: ShouldNotReachHere();
2592       }
2593     }
2594 
2595     Register dst_lo = dst->as_register_lo();
2596     Register dst_hi = dst->as_register_hi();
2597 
2598 #ifdef _LP64
2599     move_regs(l_lo, dst_lo);
2600 #else
2601     if (dst_lo == l_hi) {
2602       assert(dst_hi != l_lo, "overwriting registers");
2603       move_regs(l_hi, dst_hi);
2604       move_regs(l_lo, dst_lo);
2605     } else {
2606       assert(dst_lo != l_hi, "overwriting registers");
2607       move_regs(l_lo, dst_lo);
2608       move_regs(l_hi, dst_hi);
2609     }
2610 #endif // _LP64
2611   }
2612 }
2613 
2614 
2615 // we assume that rax, and rdx can be overwritten
2616 void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) {
2617 
2618   assert(left->is_single_cpu(),   "left must be register");
2619   assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
2620   assert(result->is_single_cpu(), "result must be register");
2621 
2622   //  assert(left->destroys_register(), "check");
2623   //  assert(right->destroys_register(), "check");
2624 
2625   Register lreg = left->as_register();
2626   Register dreg = result->as_register();
2627 
2628   if (right->is_constant()) {
2629     int divisor = right->as_constant_ptr()->as_jint();
2630     assert(divisor > 0 && is_power_of_2(divisor), "must be");
2631     if (code == lir_idiv) {
2632       assert(lreg == rax, "must be rax,");
2633       assert(temp->as_register() == rdx, "tmp register must be rdx");
2634       __ cdql(); // sign extend into rdx:rax
2635       if (divisor == 2) {
2636         __ subl(lreg, rdx);
2637       } else {
2638         __ andl(rdx, divisor - 1);
2639         __ addl(lreg, rdx);
2640       }
2641       __ sarl(lreg, log2_intptr(divisor));
2642       move_regs(lreg, dreg);
2643     } else if (code == lir_irem) {
2644       Label done;
2645       __ mov(dreg, lreg);
2646       __ andl(dreg, 0x80000000 | (divisor - 1));
2647       __ jcc(Assembler::positive, done);
2648       __ decrement(dreg);
2649       __ orl(dreg, ~(divisor - 1));
2650       __ increment(dreg);
2651       __ bind(done);
2652     } else {
2653       ShouldNotReachHere();
2654     }
2655   } else {
2656     Register rreg = right->as_register();
2657     assert(lreg == rax, "left register must be rax,");
2658     assert(rreg != rdx, "right register must not be rdx");
2659     assert(temp->as_register() == rdx, "tmp register must be rdx");
2660 
2661     move_regs(lreg, rax);
2662 
2663     int idivl_offset = __ corrected_idivl(rreg);
2664     add_debug_info_for_div0(idivl_offset, info);
2665     if (code == lir_irem) {
2666       move_regs(rdx, dreg); // result is in rdx
2667     } else {
2668       move_regs(rax, dreg);
2669     }
2670   }
2671 }
2672 
2673 
2674 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
2675   if (opr1->is_single_cpu()) {
2676     Register reg1 = opr1->as_register();
2677     if (opr2->is_single_cpu()) {
2678       // cpu register - cpu register
2679       if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
2680         __ cmpptr(reg1, opr2->as_register());
2681       } else {
2682         assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
2683         __ cmpl(reg1, opr2->as_register());
2684       }
2685     } else if (opr2->is_stack()) {
2686       // cpu register - stack
2687       if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
2688         __ cmpptr(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2689       } else {
2690         __ cmpl(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2691       }
2692     } else if (opr2->is_constant()) {
2693       // cpu register - constant
2694       LIR_Const* c = opr2->as_constant_ptr();
2695       if (c->type() == T_INT) {
2696         __ cmpl(reg1, c->as_jint());
2697       } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2698         // In 64bit oops are single register
2699         jobject o = c->as_jobject();
2700         if (o == NULL) {
2701           __ cmpptr(reg1, (int32_t)NULL_WORD);
2702         } else {
2703 #ifdef _LP64
2704           __ movoop(rscratch1, o);
2705           __ cmpptr(reg1, rscratch1);
2706 #else
2707           __ cmpoop(reg1, c->as_jobject());
2708 #endif // _LP64
2709         }
2710       } else {
2711         ShouldNotReachHere();
2712       }
2713       // cpu register - address
2714     } else if (opr2->is_address()) {
2715       if (op->info() != NULL) {
2716         add_debug_info_for_null_check_here(op->info());
2717       }
2718       __ cmpl(reg1, as_Address(opr2->as_address_ptr()));
2719     } else {
2720       ShouldNotReachHere();
2721     }
2722 
2723   } else if(opr1->is_double_cpu()) {
2724     Register xlo = opr1->as_register_lo();
2725     Register xhi = opr1->as_register_hi();
2726     if (opr2->is_double_cpu()) {
2727 #ifdef _LP64
2728       __ cmpptr(xlo, opr2->as_register_lo());
2729 #else
2730       // cpu register - cpu register
2731       Register ylo = opr2->as_register_lo();
2732       Register yhi = opr2->as_register_hi();
2733       __ subl(xlo, ylo);
2734       __ sbbl(xhi, yhi);
2735       if (condition == lir_cond_equal || condition == lir_cond_notEqual) {
2736         __ orl(xhi, xlo);
2737       }
2738 #endif // _LP64
2739     } else if (opr2->is_constant()) {
2740       // cpu register - constant 0
2741       assert(opr2->as_jlong() == (jlong)0, "only handles zero");
2742 #ifdef _LP64
2743       __ cmpptr(xlo, (int32_t)opr2->as_jlong());
2744 #else
2745       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles equals case");
2746       __ orl(xhi, xlo);
2747 #endif // _LP64
2748     } else {
2749       ShouldNotReachHere();
2750     }
2751 
2752   } else if (opr1->is_single_xmm()) {
2753     XMMRegister reg1 = opr1->as_xmm_float_reg();
2754     if (opr2->is_single_xmm()) {
2755       // xmm register - xmm register
2756       __ ucomiss(reg1, opr2->as_xmm_float_reg());
2757     } else if (opr2->is_stack()) {
2758       // xmm register - stack
2759       __ ucomiss(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
2760     } else if (opr2->is_constant()) {
2761       // xmm register - constant
2762       __ ucomiss(reg1, InternalAddress(float_constant(opr2->as_jfloat())));
2763     } else if (opr2->is_address()) {
2764       // xmm register - address
2765       if (op->info() != NULL) {
2766         add_debug_info_for_null_check_here(op->info());
2767       }
2768       __ ucomiss(reg1, as_Address(opr2->as_address_ptr()));
2769     } else {
2770       ShouldNotReachHere();
2771     }
2772 
2773   } else if (opr1->is_double_xmm()) {
2774     XMMRegister reg1 = opr1->as_xmm_double_reg();
2775     if (opr2->is_double_xmm()) {
2776       // xmm register - xmm register
2777       __ ucomisd(reg1, opr2->as_xmm_double_reg());
2778     } else if (opr2->is_stack()) {
2779       // xmm register - stack
2780       __ ucomisd(reg1, frame_map()->address_for_slot(opr2->double_stack_ix()));
2781     } else if (opr2->is_constant()) {
2782       // xmm register - constant
2783       __ ucomisd(reg1, InternalAddress(double_constant(opr2->as_jdouble())));
2784     } else if (opr2->is_address()) {
2785       // xmm register - address
2786       if (op->info() != NULL) {
2787         add_debug_info_for_null_check_here(op->info());
2788       }
2789       __ ucomisd(reg1, as_Address(opr2->pointer()->as_address()));
2790     } else {
2791       ShouldNotReachHere();
2792     }
2793 
2794   } else if(opr1->is_single_fpu() || opr1->is_double_fpu()) {
2795     assert(opr1->is_fpu_register() && opr1->fpu() == 0, "currently left-hand side must be on TOS (relax this restriction)");
2796     assert(opr2->is_fpu_register(), "both must be registers");
2797     __ fcmp(noreg, opr2->fpu(), op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2798 
2799   } else if (opr1->is_address() && opr2->is_constant()) {
2800     LIR_Const* c = opr2->as_constant_ptr();
2801 #ifdef _LP64
2802     if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2803       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "need to reverse");
2804       __ movoop(rscratch1, c->as_jobject());
2805     }
2806 #endif // LP64
2807     if (op->info() != NULL) {
2808       add_debug_info_for_null_check_here(op->info());
2809     }
2810     // special case: address - constant
2811     LIR_Address* addr = opr1->as_address_ptr();
2812     if (c->type() == T_INT) {
2813       __ cmpl(as_Address(addr), c->as_jint());
2814     } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) {
2815 #ifdef _LP64
2816       // %%% Make this explode if addr isn't reachable until we figure out a
2817       // better strategy by giving noreg as the temp for as_Address
2818       __ cmpptr(rscratch1, as_Address(addr, noreg));
2819 #else
2820       __ cmpoop(as_Address(addr), c->as_jobject());
2821 #endif // _LP64
2822     } else {
2823       ShouldNotReachHere();
2824     }
2825 
2826   } else {
2827     ShouldNotReachHere();
2828   }
2829 }
2830 
2831 void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
2832   if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
2833     if (left->is_single_xmm()) {
2834       assert(right->is_single_xmm(), "must match");
2835       __ cmpss2int(left->as_xmm_float_reg(), right->as_xmm_float_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2836     } else if (left->is_double_xmm()) {
2837       assert(right->is_double_xmm(), "must match");
2838       __ cmpsd2int(left->as_xmm_double_reg(), right->as_xmm_double_reg(), dst->as_register(), code == lir_ucmp_fd2i);
2839 
2840     } else {
2841       assert(left->is_single_fpu() || left->is_double_fpu(), "must be");
2842       assert(right->is_single_fpu() || right->is_double_fpu(), "must match");
2843 
2844       assert(left->fpu() == 0, "left must be on TOS");
2845       __ fcmp2int(dst->as_register(), code == lir_ucmp_fd2i, right->fpu(),
2846                   op->fpu_pop_count() > 0, op->fpu_pop_count() > 1);
2847     }
2848   } else {
2849     assert(code == lir_cmp_l2i, "check");
2850 #ifdef _LP64
2851     Label done;
2852     Register dest = dst->as_register();
2853     __ cmpptr(left->as_register_lo(), right->as_register_lo());
2854     __ movl(dest, -1);
2855     __ jccb(Assembler::less, done);
2856     __ set_byte_if_not_zero(dest);
2857     __ movzbl(dest, dest);
2858     __ bind(done);
2859 #else
2860     __ lcmp2int(left->as_register_hi(),
2861                 left->as_register_lo(),
2862                 right->as_register_hi(),
2863                 right->as_register_lo());
2864     move_regs(left->as_register_hi(), dst->as_register());
2865 #endif // _LP64
2866   }
2867 }
2868 
2869 
2870 void LIR_Assembler::align_call(LIR_Code code) {
2871   if (os::is_MP()) {
2872     // make sure that the displacement word of the call ends up word aligned
2873     int offset = __ offset();
2874     switch (code) {
2875       case lir_static_call:
2876       case lir_optvirtual_call:
2877       case lir_dynamic_call:
2878         offset += NativeCall::displacement_offset;
2879         break;
2880       case lir_icvirtual_call:
2881         offset += NativeCall::displacement_offset + NativeMovConstReg::instruction_size;
2882       break;
2883       case lir_virtual_call:  // currently, sparc-specific for niagara
2884       default: ShouldNotReachHere();
2885     }
2886     while (offset++ % BytesPerWord != 0) {
2887       __ nop();
2888     }
2889   }
2890 }
2891 
2892 
2893 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
2894   assert(!os::is_MP() || (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
2895          "must be aligned");
2896   __ call(AddressLiteral(op->addr(), rtype));
2897   add_call_info(code_offset(), op->info());
2898 }
2899 
2900 
2901 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
2902   RelocationHolder rh = virtual_call_Relocation::spec(pc());
2903   __ movoop(IC_Klass, (jobject)Universe::non_oop_word());
2904   assert(!os::is_MP() ||
2905          (__ offset() + NativeCall::displacement_offset) % BytesPerWord == 0,
2906          "must be aligned");
2907   __ call(AddressLiteral(op->addr(), rh));
2908   add_call_info(code_offset(), op->info());
2909 }
2910 
2911 
2912 /* Currently, vtable-dispatch is only enabled for sparc platforms */
2913 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
2914   ShouldNotReachHere();
2915 }
2916 
2917 
2918 void LIR_Assembler::emit_static_call_stub() {
2919   address call_pc = __ pc();
2920   address stub = __ start_a_stub(call_stub_size);
2921   if (stub == NULL) {
2922     bailout("static call stub overflow");
2923     return;
2924   }
2925 
2926   int start = __ offset();
2927   if (os::is_MP()) {
2928     // make sure that the displacement word of the call ends up word aligned
2929     int offset = __ offset() + NativeMovConstReg::instruction_size + NativeCall::displacement_offset;
2930     while (offset++ % BytesPerWord != 0) {
2931       __ nop();
2932     }
2933   }
2934   __ relocate(static_stub_Relocation::spec(call_pc));
2935   __ movoop(rbx, (jobject)NULL);
2936   // must be set to -1 at code generation time
2937   assert(!os::is_MP() || ((__ offset() + 1) % BytesPerWord) == 0, "must be aligned on MP");
2938   // On 64bit this will die since it will take a movq & jmp, must be only a jmp
2939   __ jump(RuntimeAddress(__ pc()));
2940 
2941   assert(__ offset() - start <= call_stub_size, "stub too big");
2942   __ end_a_stub();
2943 }
2944 
2945 
2946 void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
2947   assert(exceptionOop->as_register() == rax, "must match");
2948   assert(exceptionPC->as_register() == rdx, "must match");
2949 
2950   // exception object is not added to oop map by LinearScan
2951   // (LinearScan assumes that no oops are in fixed registers)
2952   info->add_register_oop(exceptionOop);
2953   Runtime1::StubID unwind_id;
2954 
2955   // get current pc information
2956   // pc is only needed if the method has an exception handler, the unwind code does not need it.
2957   int pc_for_athrow_offset = __ offset();
2958   InternalAddress pc_for_athrow(__ pc());
2959   __ lea(exceptionPC->as_register(), pc_for_athrow);
2960   add_call_info(pc_for_athrow_offset, info); // for exception handler
2961 
2962   __ verify_not_null_oop(rax);
2963   // search an exception handler (rax: exception oop, rdx: throwing pc)
2964   if (compilation()->has_fpu_code()) {
2965     unwind_id = Runtime1::handle_exception_id;
2966   } else {
2967     unwind_id = Runtime1::handle_exception_nofpu_id;
2968   }
2969   __ call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
2970 
2971   // enough room for two byte trap
2972   __ nop();
2973 }
2974 
2975 
2976 void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
2977   assert(exceptionOop->as_register() == rax, "must match");
2978 
2979   __ jmp(_unwind_handler_entry);
2980 }
2981 
2982 
2983 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
2984 
2985   // optimized version for linear scan:
2986   // * count must be already in ECX (guaranteed by LinearScan)
2987   // * left and dest must be equal
2988   // * tmp must be unused
2989   assert(count->as_register() == SHIFT_count, "count must be in ECX");
2990   assert(left == dest, "left and dest must be equal");
2991   assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
2992 
2993   if (left->is_single_cpu()) {
2994     Register value = left->as_register();
2995     assert(value != SHIFT_count, "left cannot be ECX");
2996 
2997     switch (code) {
2998       case lir_shl:  __ shll(value); break;
2999       case lir_shr:  __ sarl(value); break;
3000       case lir_ushr: __ shrl(value); break;
3001       default: ShouldNotReachHere();
3002     }
3003   } else if (left->is_double_cpu()) {
3004     Register lo = left->as_register_lo();
3005     Register hi = left->as_register_hi();
3006     assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX");
3007 #ifdef _LP64
3008     switch (code) {
3009       case lir_shl:  __ shlptr(lo);        break;
3010       case lir_shr:  __ sarptr(lo);        break;
3011       case lir_ushr: __ shrptr(lo);        break;
3012       default: ShouldNotReachHere();
3013     }
3014 #else
3015 
3016     switch (code) {
3017       case lir_shl:  __ lshl(hi, lo);        break;
3018       case lir_shr:  __ lshr(hi, lo, true);  break;
3019       case lir_ushr: __ lshr(hi, lo, false); break;
3020       default: ShouldNotReachHere();
3021     }
3022 #endif // LP64
3023   } else {
3024     ShouldNotReachHere();
3025   }
3026 }
3027 
3028 
3029 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
3030   if (dest->is_single_cpu()) {
3031     // first move left into dest so that left is not destroyed by the shift
3032     Register value = dest->as_register();
3033     count = count & 0x1F; // Java spec
3034 
3035     move_regs(left->as_register(), value);
3036     switch (code) {
3037       case lir_shl:  __ shll(value, count); break;
3038       case lir_shr:  __ sarl(value, count); break;
3039       case lir_ushr: __ shrl(value, count); break;
3040       default: ShouldNotReachHere();
3041     }
3042   } else if (dest->is_double_cpu()) {
3043 #ifndef _LP64
3044     Unimplemented();
3045 #else
3046     // first move left into dest so that left is not destroyed by the shift
3047     Register value = dest->as_register_lo();
3048     count = count & 0x1F; // Java spec
3049 
3050     move_regs(left->as_register_lo(), value);
3051     switch (code) {
3052       case lir_shl:  __ shlptr(value, count); break;
3053       case lir_shr:  __ sarptr(value, count); break;
3054       case lir_ushr: __ shrptr(value, count); break;
3055       default: ShouldNotReachHere();
3056     }
3057 #endif // _LP64
3058   } else {
3059     ShouldNotReachHere();
3060   }
3061 }
3062 
3063 
3064 void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
3065   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3066   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3067   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3068   __ movptr (Address(rsp, offset_from_rsp_in_bytes), r);
3069 }
3070 
3071 
3072 void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
3073   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3074   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3075   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3076   __ movptr (Address(rsp, offset_from_rsp_in_bytes), c);
3077 }
3078 
3079 
3080 void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
3081   assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
3082   int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
3083   assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
3084   __ movoop (Address(rsp, offset_from_rsp_in_bytes), o);
3085 }
3086 
3087 
3088 // This code replaces a call to arraycopy; no exception may
3089 // be thrown in this code, they must be thrown in the System.arraycopy
3090 // activation frame; we could save some checks if this would not be the case
3091 void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
3092   ciArrayKlass* default_type = op->expected_type();
3093   Register src = op->src()->as_register();
3094   Register dst = op->dst()->as_register();
3095   Register src_pos = op->src_pos()->as_register();
3096   Register dst_pos = op->dst_pos()->as_register();
3097   Register length  = op->length()->as_register();
3098   Register tmp = op->tmp()->as_register();
3099 
3100   CodeStub* stub = op->stub();
3101   int flags = op->flags();
3102   BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
3103   if (basic_type == T_ARRAY) basic_type = T_OBJECT;
3104 
3105   // if we don't know anything, just go through the generic arraycopy
3106   if (default_type == NULL) {
3107     Label done;
3108     // save outgoing arguments on stack in case call to System.arraycopy is needed
3109     // HACK ALERT. This code used to push the parameters in a hardwired fashion
3110     // for interpreter calling conventions. Now we have to do it in new style conventions.
3111     // For the moment until C1 gets the new register allocator I just force all the
3112     // args to the right place (except the register args) and then on the back side
3113     // reload the register args properly if we go slow path. Yuck
3114 
3115     // These are proper for the calling convention
3116     store_parameter(length, 2);
3117     store_parameter(dst_pos, 1);
3118     store_parameter(dst, 0);
3119 
3120     // these are just temporary placements until we need to reload
3121     store_parameter(src_pos, 3);
3122     store_parameter(src, 4);
3123     NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");)
3124 
3125     address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
3126 
3127     address copyfunc_addr = StubRoutines::generic_arraycopy();
3128 
3129     // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint
3130 #ifdef _LP64
3131     // The arguments are in java calling convention so we can trivially shift them to C
3132     // convention
3133     assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
3134     __ mov(c_rarg0, j_rarg0);
3135     assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
3136     __ mov(c_rarg1, j_rarg1);
3137     assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
3138     __ mov(c_rarg2, j_rarg2);
3139     assert_different_registers(c_rarg3, j_rarg4);
3140     __ mov(c_rarg3, j_rarg3);
3141 #ifdef _WIN64
3142     // Allocate abi space for args but be sure to keep stack aligned
3143     __ subptr(rsp, 6*wordSize);
3144     store_parameter(j_rarg4, 4);
3145     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3146       __ call(RuntimeAddress(C_entry));
3147     } else {
3148 #ifndef PRODUCT
3149       if (PrintC1Statistics) {
3150         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3151       }
3152 #endif
3153       __ call(RuntimeAddress(copyfunc_addr));
3154     }
3155     __ addptr(rsp, 6*wordSize);
3156 #else
3157     __ mov(c_rarg4, j_rarg4);
3158     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3159       __ call(RuntimeAddress(C_entry));
3160     } else {
3161 #ifndef PRODUCT
3162       if (PrintC1Statistics) {
3163         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3164       }
3165 #endif
3166       __ call(RuntimeAddress(copyfunc_addr));
3167     }
3168 #endif // _WIN64
3169 #else
3170     __ push(length);
3171     __ push(dst_pos);
3172     __ push(dst);
3173     __ push(src_pos);
3174     __ push(src);
3175 
3176     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
3177       __ call_VM_leaf(C_entry, 5); // removes pushed parameter from the stack
3178     } else {
3179 #ifndef PRODUCT
3180       if (PrintC1Statistics) {
3181         __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
3182       }
3183 #endif
3184       __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack
3185     }
3186 
3187 #endif // _LP64
3188 
3189     __ cmpl(rax, 0);
3190     __ jcc(Assembler::equal, *stub->continuation());
3191 
3192     if (copyfunc_addr != NULL) {
3193       __ mov(tmp, rax);
3194       __ xorl(tmp, -1);
3195     }
3196 
3197     // Reload values from the stack so they are where the stub
3198     // expects them.
3199     __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3200     __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3201     __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3202     __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3203     __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3204 
3205     if (copyfunc_addr != NULL) {
3206       __ subl(length, tmp);
3207       __ addl(src_pos, tmp);
3208       __ addl(dst_pos, tmp);
3209     }
3210     __ jmp(*stub->entry());
3211 
3212     __ bind(*stub->continuation());
3213     return;
3214   }
3215 
3216   assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
3217 
3218   int elem_size = type2aelembytes(basic_type);
3219   int shift_amount;
3220   Address::ScaleFactor scale;
3221 
3222   switch (elem_size) {
3223     case 1 :
3224       shift_amount = 0;
3225       scale = Address::times_1;
3226       break;
3227     case 2 :
3228       shift_amount = 1;
3229       scale = Address::times_2;
3230       break;
3231     case 4 :
3232       shift_amount = 2;
3233       scale = Address::times_4;
3234       break;
3235     case 8 :
3236       shift_amount = 3;
3237       scale = Address::times_8;
3238       break;
3239     default:
3240       ShouldNotReachHere();
3241   }
3242 
3243   Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
3244   Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
3245   Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
3246   Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
3247 
3248   // length and pos's are all sign extended at this point on 64bit
3249 
3250   // test for NULL
3251   if (flags & LIR_OpArrayCopy::src_null_check) {
3252     __ testptr(src, src);
3253     __ jcc(Assembler::zero, *stub->entry());
3254   }
3255   if (flags & LIR_OpArrayCopy::dst_null_check) {
3256     __ testptr(dst, dst);
3257     __ jcc(Assembler::zero, *stub->entry());
3258   }
3259 
3260   // check if negative
3261   if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
3262     __ testl(src_pos, src_pos);
3263     __ jcc(Assembler::less, *stub->entry());
3264   }
3265   if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
3266     __ testl(dst_pos, dst_pos);
3267     __ jcc(Assembler::less, *stub->entry());
3268   }
3269 
3270   if (flags & LIR_OpArrayCopy::src_range_check) {
3271     __ lea(tmp, Address(src_pos, length, Address::times_1, 0));
3272     __ cmpl(tmp, src_length_addr);
3273     __ jcc(Assembler::above, *stub->entry());
3274   }
3275   if (flags & LIR_OpArrayCopy::dst_range_check) {
3276     __ lea(tmp, Address(dst_pos, length, Address::times_1, 0));
3277     __ cmpl(tmp, dst_length_addr);
3278     __ jcc(Assembler::above, *stub->entry());
3279   }
3280 
3281   if (flags & LIR_OpArrayCopy::length_positive_check) {
3282     __ testl(length, length);
3283     __ jcc(Assembler::less, *stub->entry());
3284     __ jcc(Assembler::zero, *stub->continuation());
3285   }
3286 
3287 #ifdef _LP64
3288   __ movl2ptr(src_pos, src_pos); //higher 32bits must be null
3289   __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null
3290 #endif
3291 
3292   if (flags & LIR_OpArrayCopy::type_check) {
3293     // We don't know the array types are compatible
3294     if (basic_type != T_OBJECT) {
3295       // Simple test for basic type arrays
3296       if (UseCompressedOops) {
3297         __ movl(tmp, src_klass_addr);
3298         __ cmpl(tmp, dst_klass_addr);
3299       } else {
3300         __ movptr(tmp, src_klass_addr);
3301         __ cmpptr(tmp, dst_klass_addr);
3302       }
3303       __ jcc(Assembler::notEqual, *stub->entry());
3304     } else {
3305       // For object arrays, if src is a sub class of dst then we can
3306       // safely do the copy.
3307       Label cont, slow;
3308 
3309       __ push(src);
3310       __ push(dst);
3311 
3312       __ load_klass(src, src);
3313       __ load_klass(dst, dst);
3314 
3315       __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
3316 
3317       __ push(src);
3318       __ push(dst);
3319       __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
3320       __ pop(dst);
3321       __ pop(src);
3322 
3323       __ cmpl(src, 0);
3324       __ jcc(Assembler::notEqual, cont);
3325 
3326       __ bind(slow);
3327       __ pop(dst);
3328       __ pop(src);
3329 
3330       address copyfunc_addr = StubRoutines::checkcast_arraycopy();
3331       if (copyfunc_addr != NULL) { // use stub if available
3332         // src is not a sub class of dst so we have to do a
3333         // per-element check.
3334 
3335         int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
3336         if ((flags & mask) != mask) {
3337           // Check that at least both of them object arrays.
3338           assert(flags & mask, "one of the two should be known to be an object array");
3339 
3340           if (!(flags & LIR_OpArrayCopy::src_objarray)) {
3341             __ load_klass(tmp, src);
3342           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
3343             __ load_klass(tmp, dst);
3344           }
3345           int lh_offset = klassOopDesc::header_size() * HeapWordSize +
3346             Klass::layout_helper_offset_in_bytes();
3347           Address klass_lh_addr(tmp, lh_offset);
3348           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3349           __ cmpl(klass_lh_addr, objArray_lh);
3350           __ jcc(Assembler::notEqual, *stub->entry());
3351         }
3352 
3353        // Spill because stubs can use any register they like and it's
3354        // easier to restore just those that we care about.
3355        store_parameter(dst, 0);
3356        store_parameter(dst_pos, 1);
3357        store_parameter(length, 2);
3358        store_parameter(src_pos, 3);
3359        store_parameter(src, 4);
3360 
3361 #ifndef _LP64
3362         __ movptr(tmp, dst_klass_addr);
3363         __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3364         __ push(tmp);
3365         __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3366         __ push(tmp);
3367         __ push(length);
3368         __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3369         __ push(tmp);
3370         __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3371         __ push(tmp);
3372 
3373         __ call_VM_leaf(copyfunc_addr, 5);
3374 #else
3375         __ movl2ptr(length, length); //higher 32bits must be null
3376 
3377         __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3378         assert_different_registers(c_rarg0, dst, dst_pos, length);
3379         __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3380         assert_different_registers(c_rarg1, dst, length);
3381 
3382         __ mov(c_rarg2, length);
3383         assert_different_registers(c_rarg2, dst);
3384 
3385 #ifdef _WIN64
3386         // Allocate abi space for args but be sure to keep stack aligned
3387         __ subptr(rsp, 6*wordSize);
3388         __ load_klass(c_rarg3, dst);
3389         __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3390         store_parameter(c_rarg3, 4);
3391         __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3392         __ call(RuntimeAddress(copyfunc_addr));
3393         __ addptr(rsp, 6*wordSize);
3394 #else
3395         __ load_klass(c_rarg4, dst);
3396         __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
3397         __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc)));
3398         __ call(RuntimeAddress(copyfunc_addr));
3399 #endif
3400 
3401 #endif
3402 
3403 #ifndef PRODUCT
3404         if (PrintC1Statistics) {
3405           Label failed;
3406           __ testl(rax, rax);
3407           __ jcc(Assembler::notZero, failed);
3408           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
3409           __ bind(failed);
3410         }
3411 #endif
3412 
3413         __ testl(rax, rax);
3414         __ jcc(Assembler::zero, *stub->continuation());
3415 
3416 #ifndef PRODUCT
3417         if (PrintC1Statistics) {
3418           __ incrementl(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
3419         }
3420 #endif
3421 
3422         __ mov(tmp, rax);
3423 
3424         __ xorl(tmp, -1);
3425 
3426         // Restore previously spilled arguments
3427         __ movptr   (dst,     Address(rsp, 0*BytesPerWord));
3428         __ movptr   (dst_pos, Address(rsp, 1*BytesPerWord));
3429         __ movptr   (length,  Address(rsp, 2*BytesPerWord));
3430         __ movptr   (src_pos, Address(rsp, 3*BytesPerWord));
3431         __ movptr   (src,     Address(rsp, 4*BytesPerWord));
3432 
3433 
3434         __ subl(length, tmp);
3435         __ addl(src_pos, tmp);
3436         __ addl(dst_pos, tmp);
3437       }
3438 
3439       __ jmp(*stub->entry());
3440 
3441       __ bind(cont);
3442       __ pop(dst);
3443       __ pop(src);
3444     }
3445   }
3446 
3447 #ifdef ASSERT
3448   if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
3449     // Sanity check the known type with the incoming class.  For the
3450     // primitive case the types must match exactly with src.klass and
3451     // dst.klass each exactly matching the default type.  For the
3452     // object array case, if no type check is needed then either the
3453     // dst type is exactly the expected type and the src type is a
3454     // subtype which we can't check or src is the same array as dst
3455     // but not necessarily exactly of type default_type.
3456     Label known_ok, halt;
3457     __ movoop(tmp, default_type->constant_encoding());
3458 #ifdef _LP64
3459     if (UseCompressedOops) {
3460       __ encode_heap_oop(tmp);
3461     }
3462 #endif
3463 
3464     if (basic_type != T_OBJECT) {
3465 
3466       if (UseCompressedOops) __ cmpl(tmp, dst_klass_addr);
3467       else                   __ cmpptr(tmp, dst_klass_addr);
3468       __ jcc(Assembler::notEqual, halt);
3469       if (UseCompressedOops) __ cmpl(tmp, src_klass_addr);
3470       else                   __ cmpptr(tmp, src_klass_addr);
3471       __ jcc(Assembler::equal, known_ok);
3472     } else {
3473       if (UseCompressedOops) __ cmpl(tmp, dst_klass_addr);
3474       else                   __ cmpptr(tmp, dst_klass_addr);
3475       __ jcc(Assembler::equal, known_ok);
3476       __ cmpptr(src, dst);
3477       __ jcc(Assembler::equal, known_ok);
3478     }
3479     __ bind(halt);
3480     __ stop("incorrect type information in arraycopy");
3481     __ bind(known_ok);
3482   }
3483 #endif
3484 
3485 #ifndef PRODUCT
3486   if (PrintC1Statistics) {
3487     __ incrementl(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
3488   }
3489 #endif
3490 
3491 #ifdef _LP64
3492   assert_different_registers(c_rarg0, dst, dst_pos, length);
3493   __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3494   assert_different_registers(c_rarg1, length);
3495   __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3496   __ mov(c_rarg2, length);
3497 
3498 #else
3499   __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3500   store_parameter(tmp, 0);
3501   __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type)));
3502   store_parameter(tmp, 1);
3503   store_parameter(length, 2);
3504 #endif // _LP64
3505 
3506   bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
3507   bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
3508   const char *name;
3509   address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
3510   __ call_VM_leaf(entry, 0);
3511 
3512   __ bind(*stub->continuation());
3513 }
3514 
3515 
3516 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
3517   Register obj = op->obj_opr()->as_register();  // may not be an oop
3518   Register hdr = op->hdr_opr()->as_register();
3519   Register lock = op->lock_opr()->as_register();
3520   if (!UseFastLocking) {
3521     __ jmp(*op->stub()->entry());
3522   } else if (op->code() == lir_lock) {
3523     Register scratch = noreg;
3524     if (UseBiasedLocking) {
3525       scratch = op->scratch_opr()->as_register();
3526     }
3527     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3528     // add debug info for NullPointerException only if one is possible
3529     int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
3530     if (op->info() != NULL) {
3531       add_debug_info_for_null_check(null_check_offset, op->info());
3532     }
3533     // done
3534   } else if (op->code() == lir_unlock) {
3535     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
3536     __ unlock_object(hdr, obj, lock, *op->stub()->entry());
3537   } else {
3538     Unimplemented();
3539   }
3540   __ bind(*op->stub()->continuation());
3541 }
3542 
3543 
3544 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
3545   ciMethod* method = op->profiled_method();
3546   int bci          = op->profiled_bci();
3547 
3548   // Update counter for all call types
3549   ciMethodData* md = method->method_data_or_null();
3550   assert(md != NULL, "Sanity");
3551   ciProfileData* data = md->bci_to_data(bci);
3552   assert(data->is_CounterData(), "need CounterData for calls");
3553   assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
3554   Register mdo  = op->mdo()->as_register();
3555   __ movoop(mdo, md->constant_encoding());
3556   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
3557   Bytecodes::Code bc = method->java_code_at_bci(bci);
3558   // Perform additional virtual call profiling for invokevirtual and
3559   // invokeinterface bytecodes
3560   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
3561       C1ProfileVirtualCalls) {
3562     assert(op->recv()->is_single_cpu(), "recv must be allocated");
3563     Register recv = op->recv()->as_register();
3564     assert_different_registers(mdo, recv);
3565     assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
3566     ciKlass* known_klass = op->known_holder();
3567     if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
3568       // We know the type that will be seen at this call site; we can
3569       // statically update the methodDataOop rather than needing to do
3570       // dynamic tests on the receiver type
3571 
3572       // NOTE: we should probably put a lock around this search to
3573       // avoid collisions by concurrent compilations
3574       ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
3575       uint i;
3576       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3577         ciKlass* receiver = vc_data->receiver(i);
3578         if (known_klass->equals(receiver)) {
3579           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3580           __ addptr(data_addr, DataLayout::counter_increment);
3581           return;
3582         }
3583       }
3584 
3585       // Receiver type not found in profile data; select an empty slot
3586 
3587       // Note that this is less efficient than it should be because it
3588       // always does a write to the receiver part of the
3589       // VirtualCallData rather than just the first time
3590       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3591         ciKlass* receiver = vc_data->receiver(i);
3592         if (receiver == NULL) {
3593           Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
3594           __ movoop(recv_addr, known_klass->constant_encoding());
3595           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
3596           __ addptr(data_addr, DataLayout::counter_increment);
3597           return;
3598         }
3599       }
3600     } else {
3601       __ load_klass(recv, recv);
3602       Label update_done;
3603       type_profile_helper(mdo, md, data, recv, &update_done);
3604       // Receiver did not match any saved receiver and there is no empty row for it.
3605       // Increment total counter to indicate polymorphic case.
3606       __ addptr(counter_addr, DataLayout::counter_increment);
3607 
3608       __ bind(update_done);
3609     }
3610   } else {
3611     // Static call
3612     __ addptr(counter_addr, DataLayout::counter_increment);
3613   }
3614 }
3615 
3616 void LIR_Assembler::emit_delay(LIR_OpDelay*) {
3617   Unimplemented();
3618 }
3619 
3620 
3621 void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
3622   __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
3623 }
3624 
3625 
3626 void LIR_Assembler::align_backward_branch_target() {
3627   __ align(BytesPerWord);
3628 }
3629 
3630 
3631 void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
3632   if (left->is_single_cpu()) {
3633     __ negl(left->as_register());
3634     move_regs(left->as_register(), dest->as_register());
3635 
3636   } else if (left->is_double_cpu()) {
3637     Register lo = left->as_register_lo();
3638 #ifdef _LP64
3639     Register dst = dest->as_register_lo();
3640     __ movptr(dst, lo);
3641     __ negptr(dst);
3642 #else
3643     Register hi = left->as_register_hi();
3644     __ lneg(hi, lo);
3645     if (dest->as_register_lo() == hi) {
3646       assert(dest->as_register_hi() != lo, "destroying register");
3647       move_regs(hi, dest->as_register_hi());
3648       move_regs(lo, dest->as_register_lo());
3649     } else {
3650       move_regs(lo, dest->as_register_lo());
3651       move_regs(hi, dest->as_register_hi());
3652     }
3653 #endif // _LP64
3654 
3655   } else if (dest->is_single_xmm()) {
3656     if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
3657       __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
3658     }
3659     __ xorps(dest->as_xmm_float_reg(),
3660              ExternalAddress((address)float_signflip_pool));
3661 
3662   } else if (dest->is_double_xmm()) {
3663     if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
3664       __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
3665     }
3666     __ xorpd(dest->as_xmm_double_reg(),
3667              ExternalAddress((address)double_signflip_pool));
3668 
3669   } else if (left->is_single_fpu() || left->is_double_fpu()) {
3670     assert(left->fpu() == 0, "arg must be on TOS");
3671     assert(dest->fpu() == 0, "dest must be TOS");
3672     __ fchs();
3673 
3674   } else {
3675     ShouldNotReachHere();
3676   }
3677 }
3678 
3679 
3680 void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
3681   assert(addr->is_address() && dest->is_register(), "check");
3682   Register reg;
3683   reg = dest->as_pointer_register();
3684   __ lea(reg, as_Address(addr->as_address_ptr()));
3685 }
3686 
3687 
3688 
3689 void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
3690   assert(!tmp->is_valid(), "don't need temporary");
3691   __ call(RuntimeAddress(dest));
3692   if (info != NULL) {
3693     add_call_info_here(info);
3694   }
3695 }
3696 
3697 
3698 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
3699   assert(type == T_LONG, "only for volatile long fields");
3700 
3701   if (info != NULL) {
3702     add_debug_info_for_null_check_here(info);
3703   }
3704 
3705   if (src->is_double_xmm()) {
3706     if (dest->is_double_cpu()) {
3707 #ifdef _LP64
3708       __ movdq(dest->as_register_lo(), src->as_xmm_double_reg());
3709 #else
3710       __ movdl(dest->as_register_lo(), src->as_xmm_double_reg());
3711       __ psrlq(src->as_xmm_double_reg(), 32);
3712       __ movdl(dest->as_register_hi(), src->as_xmm_double_reg());
3713 #endif // _LP64
3714     } else if (dest->is_double_stack()) {
3715       __ movdbl(frame_map()->address_for_slot(dest->double_stack_ix()), src->as_xmm_double_reg());
3716     } else if (dest->is_address()) {
3717       __ movdbl(as_Address(dest->as_address_ptr()), src->as_xmm_double_reg());
3718     } else {
3719       ShouldNotReachHere();
3720     }
3721 
3722   } else if (dest->is_double_xmm()) {
3723     if (src->is_double_stack()) {
3724       __ movdbl(dest->as_xmm_double_reg(), frame_map()->address_for_slot(src->double_stack_ix()));
3725     } else if (src->is_address()) {
3726       __ movdbl(dest->as_xmm_double_reg(), as_Address(src->as_address_ptr()));
3727     } else {
3728       ShouldNotReachHere();
3729     }
3730 
3731   } else if (src->is_double_fpu()) {
3732     assert(src->fpu_regnrLo() == 0, "must be TOS");
3733     if (dest->is_double_stack()) {
3734       __ fistp_d(frame_map()->address_for_slot(dest->double_stack_ix()));
3735     } else if (dest->is_address()) {
3736       __ fistp_d(as_Address(dest->as_address_ptr()));
3737     } else {
3738       ShouldNotReachHere();
3739     }
3740 
3741   } else if (dest->is_double_fpu()) {
3742     assert(dest->fpu_regnrLo() == 0, "must be TOS");
3743     if (src->is_double_stack()) {
3744       __ fild_d(frame_map()->address_for_slot(src->double_stack_ix()));
3745     } else if (src->is_address()) {
3746       __ fild_d(as_Address(src->as_address_ptr()));
3747     } else {
3748       ShouldNotReachHere();
3749     }
3750   } else {
3751     ShouldNotReachHere();
3752   }
3753 }
3754 
3755 
3756 void LIR_Assembler::membar() {
3757   // QQQ sparc TSO uses this,
3758   __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad));
3759 }
3760 
3761 void LIR_Assembler::membar_acquire() {
3762   // No x86 machines currently require load fences
3763   // __ load_fence();
3764 }
3765 
3766 void LIR_Assembler::membar_release() {
3767   // No x86 machines currently require store fences
3768   // __ store_fence();
3769 }
3770 
3771 void LIR_Assembler::get_thread(LIR_Opr result_reg) {
3772   assert(result_reg->is_register(), "check");
3773 #ifdef _LP64
3774   // __ get_thread(result_reg->as_register_lo());
3775   __ mov(result_reg->as_register(), r15_thread);
3776 #else
3777   __ get_thread(result_reg->as_register());
3778 #endif // _LP64
3779 }
3780 
3781 
3782 void LIR_Assembler::peephole(LIR_List*) {
3783   // do nothing for now
3784 }
3785 
3786 
3787 #undef __