1 /*
   2  * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "c1/c1_Compilation.hpp"
  27 #include "c1/c1_LIRAssembler.hpp"
  28 #include "c1/c1_MacroAssembler.hpp"
  29 #include "c1/c1_Runtime1.hpp"
  30 #include "c1/c1_ValueStack.hpp"
  31 #include "ci/ciArrayKlass.hpp"
  32 #include "ci/ciInstance.hpp"
  33 #include "gc_interface/collectedHeap.hpp"
  34 #include "memory/barrierSet.hpp"
  35 #include "memory/cardTableModRefBS.hpp"
  36 #include "nativeInst_sparc.hpp"
  37 #include "oops/objArrayKlass.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 
  40 #define __ _masm->
  41 
  42 
  43 //------------------------------------------------------------
  44 
  45 
  46 bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
  47   if (opr->is_constant()) {
  48     LIR_Const* constant = opr->as_constant_ptr();
  49     switch (constant->type()) {
  50       case T_INT: {
  51         jint value = constant->as_jint();
  52         return Assembler::is_simm13(value);
  53       }
  54 
  55       default:
  56         return false;
  57     }
  58   }
  59   return false;
  60 }
  61 
  62 
  63 bool LIR_Assembler::is_single_instruction(LIR_Op* op) {
  64   switch (op->code()) {
  65     case lir_null_check:
  66     return true;
  67 
  68 
  69     case lir_add:
  70     case lir_ushr:
  71     case lir_shr:
  72     case lir_shl:
  73       // integer shifts and adds are always one instruction
  74       return op->result_opr()->is_single_cpu();
  75 
  76 
  77     case lir_move: {
  78       LIR_Op1* op1 = op->as_Op1();
  79       LIR_Opr src = op1->in_opr();
  80       LIR_Opr dst = op1->result_opr();
  81 
  82       if (src == dst) {
  83         NEEDS_CLEANUP;
  84         // this works around a problem where moves with the same src and dst
  85         // end up in the delay slot and then the assembler swallows the mov
  86         // since it has no effect and then it complains because the delay slot
  87         // is empty.  returning false stops the optimizer from putting this in
  88         // the delay slot
  89         return false;
  90       }
  91 
  92       // don't put moves involving oops into the delay slot since the VerifyOops code
  93       // will make it much larger than a single instruction.
  94       if (VerifyOops) {
  95         return false;
  96       }
  97 
  98       if (src->is_double_cpu() || dst->is_double_cpu() || op1->patch_code() != lir_patch_none ||
  99           ((src->is_double_fpu() || dst->is_double_fpu()) && op1->move_kind() != lir_move_normal)) {
 100         return false;
 101       }
 102 
 103       if (UseCompressedOops) {
 104         if (dst->is_address() && !dst->is_stack() && (dst->type() == T_OBJECT || dst->type() == T_ARRAY)) return false;
 105         if (src->is_address() && !src->is_stack() && (src->type() == T_OBJECT || src->type() == T_ARRAY)) return false;
 106       }
 107 
 108       if (UseCompressedKlassPointers) {
 109         if (src->is_address() && !src->is_stack() && src->type() == T_ADDRESS &&
 110             src->as_address_ptr()->disp() == oopDesc::klass_offset_in_bytes()) return false;
 111       }
 112 
 113       if (dst->is_register()) {
 114         if (src->is_address() && Assembler::is_simm13(src->as_address_ptr()->disp())) {
 115           return !PatchALot;
 116         } else if (src->is_single_stack()) {
 117           return true;
 118         }
 119       }
 120 
 121       if (src->is_register()) {
 122         if (dst->is_address() && Assembler::is_simm13(dst->as_address_ptr()->disp())) {
 123           return !PatchALot;
 124         } else if (dst->is_single_stack()) {
 125           return true;
 126         }
 127       }
 128 
 129       if (dst->is_register() &&
 130           ((src->is_register() && src->is_single_word() && src->is_same_type(dst)) ||
 131            (src->is_constant() && LIR_Assembler::is_small_constant(op->as_Op1()->in_opr())))) {
 132         return true;
 133       }
 134 
 135       return false;
 136     }
 137 
 138     default:
 139       return false;
 140   }
 141   ShouldNotReachHere();
 142 }
 143 
 144 
 145 LIR_Opr LIR_Assembler::receiverOpr() {
 146   return FrameMap::O0_oop_opr;
 147 }
 148 
 149 
 150 LIR_Opr LIR_Assembler::osrBufferPointer() {
 151   return FrameMap::I0_opr;
 152 }
 153 
 154 
 155 int LIR_Assembler::initial_frame_size_in_bytes() {
 156   return in_bytes(frame_map()->framesize_in_bytes());
 157 }
 158 
 159 
 160 // inline cache check: the inline cached class is in G5_inline_cache_reg(G5);
 161 // we fetch the class of the receiver (O0) and compare it with the cached class.
 162 // If they do not match we jump to slow case.
 163 int LIR_Assembler::check_icache() {
 164   int offset = __ offset();
 165   __ inline_cache_check(O0, G5_inline_cache_reg);
 166   return offset;
 167 }
 168 
 169 
 170 void LIR_Assembler::osr_entry() {
 171   // On-stack-replacement entry sequence (interpreter frame layout described in interpreter_sparc.cpp):
 172   //
 173   //   1. Create a new compiled activation.
 174   //   2. Initialize local variables in the compiled activation.  The expression stack must be empty
 175   //      at the osr_bci; it is not initialized.
 176   //   3. Jump to the continuation address in compiled code to resume execution.
 177 
 178   // OSR entry point
 179   offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
 180   BlockBegin* osr_entry = compilation()->hir()->osr_entry();
 181   ValueStack* entry_state = osr_entry->end()->state();
 182   int number_of_locks = entry_state->locks_size();
 183 
 184   // Create a frame for the compiled activation.
 185   __ build_frame(initial_frame_size_in_bytes());
 186 
 187   // OSR buffer is
 188   //
 189   // locals[nlocals-1..0]
 190   // monitors[number_of_locks-1..0]
 191   //
 192   // locals is a direct copy of the interpreter frame so in the osr buffer
 193   // so first slot in the local array is the last local from the interpreter
 194   // and last slot is local[0] (receiver) from the interpreter
 195   //
 196   // Similarly with locks. The first lock slot in the osr buffer is the nth lock
 197   // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
 198   // in the interpreter frame (the method lock if a sync method)
 199 
 200   // Initialize monitors in the compiled activation.
 201   //   I0: pointer to osr buffer
 202   //
 203   // All other registers are dead at this point and the locals will be
 204   // copied into place by code emitted in the IR.
 205 
 206   Register OSR_buf = osrBufferPointer()->as_register();
 207   { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
 208     int monitor_offset = BytesPerWord * method()->max_locals() +
 209       (2 * BytesPerWord) * (number_of_locks - 1);
 210     // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
 211     // the OSR buffer using 2 word entries: first the lock and then
 212     // the oop.
 213     for (int i = 0; i < number_of_locks; i++) {
 214       int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
 215 #ifdef ASSERT
 216       // verify the interpreter's monitor has a non-null object
 217       {
 218         Label L;
 219         __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7);
 220         __ cmp_and_br_short(O7, G0, Assembler::notEqual, Assembler::pt, L);
 221         __ stop("locked object is NULL");
 222         __ bind(L);
 223       }
 224 #endif // ASSERT
 225       // Copy the lock field into the compiled activation.
 226       __ ld_ptr(OSR_buf, slot_offset + 0, O7);
 227       __ st_ptr(O7, frame_map()->address_for_monitor_lock(i));
 228       __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7);
 229       __ st_ptr(O7, frame_map()->address_for_monitor_object(i));
 230     }
 231   }
 232 }
 233 
 234 
 235 // Optimized Library calls
 236 // This is the fast version of java.lang.String.compare; it has not
 237 // OSR-entry and therefore, we generate a slow version for OSR's
 238 void LIR_Assembler::emit_string_compare(LIR_Opr left, LIR_Opr right, LIR_Opr dst, CodeEmitInfo* info) {
 239   Register str0 = left->as_register();
 240   Register str1 = right->as_register();
 241 
 242   Label Ldone;
 243 
 244   Register result = dst->as_register();
 245   {
 246     // Get a pointer to the first character of string0 in tmp0
 247     //   and get string0.length() in str0
 248     // Get a pointer to the first character of string1 in tmp1
 249     //   and get string1.length() in str1
 250     // Also, get string0.length()-string1.length() in
 251     //   o7 and get the condition code set
 252     // Note: some instructions have been hoisted for better instruction scheduling
 253 
 254     Register tmp0 = L0;
 255     Register tmp1 = L1;
 256     Register tmp2 = L2;
 257 
 258     int  value_offset = java_lang_String:: value_offset_in_bytes(); // char array
 259     if (java_lang_String::has_offset_field()) {
 260       int offset_offset = java_lang_String::offset_offset_in_bytes(); // first character position
 261       int  count_offset = java_lang_String:: count_offset_in_bytes();
 262       __ load_heap_oop(str0, value_offset, tmp0);
 263       __ ld(str0, offset_offset, tmp2);
 264       __ add(tmp0, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp0);
 265       __ ld(str0, count_offset, str0);
 266       __ sll(tmp2, exact_log2(sizeof(jchar)), tmp2);
 267     } else {
 268       __ load_heap_oop(str0, value_offset, tmp1);
 269       __ add(tmp1, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp0);
 270       __ ld(tmp1, arrayOopDesc::length_offset_in_bytes(), str0);
 271     }
 272 
 273     // str1 may be null
 274     add_debug_info_for_null_check_here(info);
 275 
 276     if (java_lang_String::has_offset_field()) {
 277       int offset_offset = java_lang_String::offset_offset_in_bytes(); // first character position
 278       int  count_offset = java_lang_String:: count_offset_in_bytes();
 279       __ load_heap_oop(str1, value_offset, tmp1);
 280       __ add(tmp0, tmp2, tmp0);
 281 
 282       __ ld(str1, offset_offset, tmp2);
 283       __ add(tmp1, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1);
 284       __ ld(str1, count_offset, str1);
 285       __ sll(tmp2, exact_log2(sizeof(jchar)), tmp2);
 286       __ add(tmp1, tmp2, tmp1);
 287     } else {
 288       __ load_heap_oop(str1, value_offset, tmp2);
 289       __ add(tmp2, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1);
 290       __ ld(tmp2, arrayOopDesc::length_offset_in_bytes(), str1);
 291     }
 292     __ subcc(str0, str1, O7);
 293   }
 294 
 295   {
 296     // Compute the minimum of the string lengths, scale it and store it in limit
 297     Register count0 = I0;
 298     Register count1 = I1;
 299     Register limit  = L3;
 300 
 301     Label Lskip;
 302     __ sll(count0, exact_log2(sizeof(jchar)), limit);             // string0 is shorter
 303     __ br(Assembler::greater, true, Assembler::pt, Lskip);
 304     __ delayed()->sll(count1, exact_log2(sizeof(jchar)), limit);  // string1 is shorter
 305     __ bind(Lskip);
 306 
 307     // If either string is empty (or both of them) the result is the difference in lengths
 308     __ cmp(limit, 0);
 309     __ br(Assembler::equal, true, Assembler::pn, Ldone);
 310     __ delayed()->mov(O7, result);  // result is difference in lengths
 311   }
 312 
 313   {
 314     // Neither string is empty
 315     Label Lloop;
 316 
 317     Register base0 = L0;
 318     Register base1 = L1;
 319     Register chr0  = I0;
 320     Register chr1  = I1;
 321     Register limit = L3;
 322 
 323     // Shift base0 and base1 to the end of the arrays, negate limit
 324     __ add(base0, limit, base0);
 325     __ add(base1, limit, base1);
 326     __ neg(limit);  // limit = -min{string0.length(), string1.length()}
 327 
 328     __ lduh(base0, limit, chr0);
 329     __ bind(Lloop);
 330     __ lduh(base1, limit, chr1);
 331     __ subcc(chr0, chr1, chr0);
 332     __ br(Assembler::notZero, false, Assembler::pn, Ldone);
 333     assert(chr0 == result, "result must be pre-placed");
 334     __ delayed()->inccc(limit, sizeof(jchar));
 335     __ br(Assembler::notZero, true, Assembler::pt, Lloop);
 336     __ delayed()->lduh(base0, limit, chr0);
 337   }
 338 
 339   // If strings are equal up to min length, return the length difference.
 340   __ mov(O7, result);
 341 
 342   // Otherwise, return the difference between the first mismatched chars.
 343   __ bind(Ldone);
 344 }
 345 
 346 
 347 // --------------------------------------------------------------------------------------------
 348 
 349 void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register hdr, int monitor_no) {
 350   if (!GenerateSynchronizationCode) return;
 351 
 352   Register obj_reg = obj_opr->as_register();
 353   Register lock_reg = lock_opr->as_register();
 354 
 355   Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no);
 356   Register reg = mon_addr.base();
 357   int offset = mon_addr.disp();
 358   // compute pointer to BasicLock
 359   if (mon_addr.is_simm13()) {
 360     __ add(reg, offset, lock_reg);
 361   }
 362   else {
 363     __ set(offset, lock_reg);
 364     __ add(reg, lock_reg, lock_reg);
 365   }
 366   // unlock object
 367   MonitorAccessStub* slow_case = new MonitorExitStub(lock_opr, UseFastLocking, monitor_no);
 368   // _slow_case_stubs->append(slow_case);
 369   // temporary fix: must be created after exceptionhandler, therefore as call stub
 370   _slow_case_stubs->append(slow_case);
 371   if (UseFastLocking) {
 372     // try inlined fast unlocking first, revert to slow locking if it fails
 373     // note: lock_reg points to the displaced header since the displaced header offset is 0!
 374     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
 375     __ unlock_object(hdr, obj_reg, lock_reg, *slow_case->entry());
 376   } else {
 377     // always do slow unlocking
 378     // note: the slow unlocking code could be inlined here, however if we use
 379     //       slow unlocking, speed doesn't matter anyway and this solution is
 380     //       simpler and requires less duplicated code - additionally, the
 381     //       slow unlocking code is the same in either case which simplifies
 382     //       debugging
 383     __ br(Assembler::always, false, Assembler::pt, *slow_case->entry());
 384     __ delayed()->nop();
 385   }
 386   // done
 387   __ bind(*slow_case->continuation());
 388 }
 389 
 390 
 391 int LIR_Assembler::emit_exception_handler() {
 392   // if the last instruction is a call (typically to do a throw which
 393   // is coming at the end after block reordering) the return address
 394   // must still point into the code area in order to avoid assertion
 395   // failures when searching for the corresponding bci => add a nop
 396   // (was bug 5/14/1999 - gri)
 397   __ nop();
 398 
 399   // generate code for exception handler
 400   ciMethod* method = compilation()->method();
 401 
 402   address handler_base = __ start_a_stub(exception_handler_size);
 403 
 404   if (handler_base == NULL) {
 405     // not enough space left for the handler
 406     bailout("exception handler overflow");
 407     return -1;
 408   }
 409 
 410   int offset = code_offset();
 411 
 412   __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
 413   __ delayed()->nop();
 414   __ should_not_reach_here();
 415   guarantee(code_offset() - offset <= exception_handler_size, "overflow");
 416   __ end_a_stub();
 417 
 418   return offset;
 419 }
 420 
 421 
 422 // Emit the code to remove the frame from the stack in the exception
 423 // unwind path.
 424 int LIR_Assembler::emit_unwind_handler() {
 425 #ifndef PRODUCT
 426   if (CommentedAssembly) {
 427     _masm->block_comment("Unwind handler");
 428   }
 429 #endif
 430 
 431   int offset = code_offset();
 432 
 433   // Fetch the exception from TLS and clear out exception related thread state
 434   __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), O0);
 435   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset()));
 436   __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset()));
 437 
 438   __ bind(_unwind_handler_entry);
 439   __ verify_not_null_oop(O0);
 440   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 441     __ mov(O0, I0);  // Preserve the exception
 442   }
 443 
 444   // Preform needed unlocking
 445   MonitorExitStub* stub = NULL;
 446   if (method()->is_synchronized()) {
 447     monitor_address(0, FrameMap::I1_opr);
 448     stub = new MonitorExitStub(FrameMap::I1_opr, true, 0);
 449     __ unlock_object(I3, I2, I1, *stub->entry());
 450     __ bind(*stub->continuation());
 451   }
 452 
 453   if (compilation()->env()->dtrace_method_probes()) {
 454     __ mov(G2_thread, O0);
 455     __ save_thread(I1); // need to preserve thread in G2 across
 456                         // runtime call
 457     metadata2reg(method()->constant_encoding(), O1);
 458     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), relocInfo::runtime_call_type);
 459     __ delayed()->nop();
 460     __ restore_thread(I1);
 461   }
 462 
 463   if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
 464     __ mov(I0, O0);  // Restore the exception
 465   }
 466 
 467   // dispatch to the unwind logic
 468   __ call(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
 469   __ delayed()->nop();
 470 
 471   // Emit the slow path assembly
 472   if (stub != NULL) {
 473     stub->emit_code(this);
 474   }
 475 
 476   return offset;
 477 }
 478 
 479 
 480 int LIR_Assembler::emit_deopt_handler() {
 481   // if the last instruction is a call (typically to do a throw which
 482   // is coming at the end after block reordering) the return address
 483   // must still point into the code area in order to avoid assertion
 484   // failures when searching for the corresponding bci => add a nop
 485   // (was bug 5/14/1999 - gri)
 486   __ nop();
 487 
 488   // generate code for deopt handler
 489   ciMethod* method = compilation()->method();
 490   address handler_base = __ start_a_stub(deopt_handler_size);
 491   if (handler_base == NULL) {
 492     // not enough space left for the handler
 493     bailout("deopt handler overflow");
 494     return -1;
 495   }
 496 
 497   int offset = code_offset();
 498   AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
 499   __ JUMP(deopt_blob, G3_scratch, 0); // sethi;jmp
 500   __ delayed()->nop();
 501   guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
 502   __ end_a_stub();
 503 
 504   return offset;
 505 }
 506 
 507 
 508 void LIR_Assembler::jobject2reg(jobject o, Register reg) {
 509   if (o == NULL) {
 510     __ set(NULL_WORD, reg);
 511   } else {
 512     int oop_index = __ oop_recorder()->find_index(o);
 513     assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(o)), "should be real oop");
 514     RelocationHolder rspec = oop_Relocation::spec(oop_index);
 515     __ set(NULL_WORD, reg, rspec); // Will be set when the nmethod is created
 516   }
 517 }
 518 
 519 
 520 void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
 521   // Allocate a new index in table to hold the object once it's been patched
 522   int oop_index = __ oop_recorder()->allocate_oop_index(NULL);
 523   PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_mirror_id, oop_index);
 524 
 525   AddressLiteral addrlit(NULL, oop_Relocation::spec(oop_index));
 526   assert(addrlit.rspec().type() == relocInfo::oop_type, "must be an oop reloc");
 527   // It may not seem necessary to use a sethi/add pair to load a NULL into dest, but the
 528   // NULL will be dynamically patched later and the patched value may be large.  We must
 529   // therefore generate the sethi/add as a placeholders
 530   __ patchable_set(addrlit, reg);
 531 
 532   patching_epilog(patch, lir_patch_normal, reg, info);
 533 }
 534 
 535 
 536 void LIR_Assembler::metadata2reg(Metadata* o, Register reg) {
 537   __ set_metadata_constant(o, reg);
 538 }
 539 
 540 void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
 541   // Allocate a new index in table to hold the klass once it's been patched
 542   int index = __ oop_recorder()->allocate_metadata_index(NULL);
 543   PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
 544   AddressLiteral addrlit(NULL, metadata_Relocation::spec(index));
 545   assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc");
 546   // It may not seem necessary to use a sethi/add pair to load a NULL into dest, but the
 547   // NULL will be dynamically patched later and the patched value may be large.  We must
 548   // therefore generate the sethi/add as a placeholders
 549   __ patchable_set(addrlit, reg);
 550 
 551   patching_epilog(patch, lir_patch_normal, reg, info);
 552 }
 553 
 554 void LIR_Assembler::emit_op3(LIR_Op3* op) {
 555   Register Rdividend = op->in_opr1()->as_register();
 556   Register Rdivisor  = noreg;
 557   Register Rscratch  = op->in_opr3()->as_register();
 558   Register Rresult   = op->result_opr()->as_register();
 559   int divisor = -1;
 560 
 561   if (op->in_opr2()->is_register()) {
 562     Rdivisor = op->in_opr2()->as_register();
 563   } else {
 564     divisor = op->in_opr2()->as_constant_ptr()->as_jint();
 565     assert(Assembler::is_simm13(divisor), "can only handle simm13");
 566   }
 567 
 568   assert(Rdividend != Rscratch, "");
 569   assert(Rdivisor  != Rscratch, "");
 570   assert(op->code() == lir_idiv || op->code() == lir_irem, "Must be irem or idiv");
 571 
 572   if (Rdivisor == noreg && is_power_of_2(divisor)) {
 573     // convert division by a power of two into some shifts and logical operations
 574     if (op->code() == lir_idiv) {
 575       if (divisor == 2) {
 576         __ srl(Rdividend, 31, Rscratch);
 577       } else {
 578         __ sra(Rdividend, 31, Rscratch);
 579         __ and3(Rscratch, divisor - 1, Rscratch);
 580       }
 581       __ add(Rdividend, Rscratch, Rscratch);
 582       __ sra(Rscratch, log2_intptr(divisor), Rresult);
 583       return;
 584     } else {
 585       if (divisor == 2) {
 586         __ srl(Rdividend, 31, Rscratch);
 587       } else {
 588         __ sra(Rdividend, 31, Rscratch);
 589         __ and3(Rscratch, divisor - 1,Rscratch);
 590       }
 591       __ add(Rdividend, Rscratch, Rscratch);
 592       __ andn(Rscratch, divisor - 1,Rscratch);
 593       __ sub(Rdividend, Rscratch, Rresult);
 594       return;
 595     }
 596   }
 597 
 598   __ sra(Rdividend, 31, Rscratch);
 599   __ wry(Rscratch);
 600   if (!VM_Version::v9_instructions_work()) {
 601     // v9 doesn't require these nops
 602     __ nop();
 603     __ nop();
 604     __ nop();
 605     __ nop();
 606   }
 607 
 608   add_debug_info_for_div0_here(op->info());
 609 
 610   if (Rdivisor != noreg) {
 611     __ sdivcc(Rdividend, Rdivisor, (op->code() == lir_idiv ? Rresult : Rscratch));
 612   } else {
 613     assert(Assembler::is_simm13(divisor), "can only handle simm13");
 614     __ sdivcc(Rdividend, divisor, (op->code() == lir_idiv ? Rresult : Rscratch));
 615   }
 616 
 617   Label skip;
 618   __ br(Assembler::overflowSet, true, Assembler::pn, skip);
 619   __ delayed()->Assembler::sethi(0x80000000, (op->code() == lir_idiv ? Rresult : Rscratch));
 620   __ bind(skip);
 621 
 622   if (op->code() == lir_irem) {
 623     if (Rdivisor != noreg) {
 624       __ smul(Rscratch, Rdivisor, Rscratch);
 625     } else {
 626       __ smul(Rscratch, divisor, Rscratch);
 627     }
 628     __ sub(Rdividend, Rscratch, Rresult);
 629   }
 630 }
 631 
 632 
 633 void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
 634 #ifdef ASSERT
 635   assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
 636   if (op->block() != NULL)  _branch_target_blocks.append(op->block());
 637   if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
 638 #endif
 639   assert(op->info() == NULL, "shouldn't have CodeEmitInfo");
 640 
 641   if (op->cond() == lir_cond_always) {
 642     __ br(Assembler::always, false, Assembler::pt, *(op->label()));
 643   } else if (op->code() == lir_cond_float_branch) {
 644     assert(op->ublock() != NULL, "must have unordered successor");
 645     bool is_unordered = (op->ublock() == op->block());
 646     Assembler::Condition acond;
 647     switch (op->cond()) {
 648       case lir_cond_equal:         acond = Assembler::f_equal;    break;
 649       case lir_cond_notEqual:      acond = Assembler::f_notEqual; break;
 650       case lir_cond_less:          acond = (is_unordered ? Assembler::f_unorderedOrLess          : Assembler::f_less);           break;
 651       case lir_cond_greater:       acond = (is_unordered ? Assembler::f_unorderedOrGreater       : Assembler::f_greater);        break;
 652       case lir_cond_lessEqual:     acond = (is_unordered ? Assembler::f_unorderedOrLessOrEqual   : Assembler::f_lessOrEqual);    break;
 653       case lir_cond_greaterEqual:  acond = (is_unordered ? Assembler::f_unorderedOrGreaterOrEqual: Assembler::f_greaterOrEqual); break;
 654       default :                         ShouldNotReachHere();
 655     };
 656 
 657     if (!VM_Version::v9_instructions_work()) {
 658       __ nop();
 659     }
 660     __ fb( acond, false, Assembler::pn, *(op->label()));
 661   } else {
 662     assert (op->code() == lir_branch, "just checking");
 663 
 664     Assembler::Condition acond;
 665     switch (op->cond()) {
 666       case lir_cond_equal:        acond = Assembler::equal;                break;
 667       case lir_cond_notEqual:     acond = Assembler::notEqual;             break;
 668       case lir_cond_less:         acond = Assembler::less;                 break;
 669       case lir_cond_lessEqual:    acond = Assembler::lessEqual;            break;
 670       case lir_cond_greaterEqual: acond = Assembler::greaterEqual;         break;
 671       case lir_cond_greater:      acond = Assembler::greater;              break;
 672       case lir_cond_aboveEqual:   acond = Assembler::greaterEqualUnsigned; break;
 673       case lir_cond_belowEqual:   acond = Assembler::lessEqualUnsigned;    break;
 674       default:                         ShouldNotReachHere();
 675     };
 676 
 677     // sparc has different condition codes for testing 32-bit
 678     // vs. 64-bit values.  We could always test xcc is we could
 679     // guarantee that 32-bit loads always sign extended but that isn't
 680     // true and since sign extension isn't free, it would impose a
 681     // slight cost.
 682 #ifdef _LP64
 683     if  (op->type() == T_INT) {
 684       __ br(acond, false, Assembler::pn, *(op->label()));
 685     } else
 686 #endif
 687       __ brx(acond, false, Assembler::pn, *(op->label()));
 688   }
 689   // The peephole pass fills the delay slot
 690 }
 691 
 692 
 693 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
 694   Bytecodes::Code code = op->bytecode();
 695   LIR_Opr dst = op->result_opr();
 696 
 697   switch(code) {
 698     case Bytecodes::_i2l: {
 699       Register rlo  = dst->as_register_lo();
 700       Register rhi  = dst->as_register_hi();
 701       Register rval = op->in_opr()->as_register();
 702 #ifdef _LP64
 703       __ sra(rval, 0, rlo);
 704 #else
 705       __ mov(rval, rlo);
 706       __ sra(rval, BitsPerInt-1, rhi);
 707 #endif
 708       break;
 709     }
 710     case Bytecodes::_i2d:
 711     case Bytecodes::_i2f: {
 712       bool is_double = (code == Bytecodes::_i2d);
 713       FloatRegister rdst = is_double ? dst->as_double_reg() : dst->as_float_reg();
 714       FloatRegisterImpl::Width w = is_double ? FloatRegisterImpl::D : FloatRegisterImpl::S;
 715       FloatRegister rsrc = op->in_opr()->as_float_reg();
 716       if (rsrc != rdst) {
 717         __ fmov(FloatRegisterImpl::S, rsrc, rdst);
 718       }
 719       __ fitof(w, rdst, rdst);
 720       break;
 721     }
 722     case Bytecodes::_f2i:{
 723       FloatRegister rsrc = op->in_opr()->as_float_reg();
 724       Address       addr = frame_map()->address_for_slot(dst->single_stack_ix());
 725       Label L;
 726       // result must be 0 if value is NaN; test by comparing value to itself
 727       __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, rsrc, rsrc);
 728       if (!VM_Version::v9_instructions_work()) {
 729         __ nop();
 730       }
 731       __ fb(Assembler::f_unordered, true, Assembler::pn, L);
 732       __ delayed()->st(G0, addr); // annuled if contents of rsrc is not NaN
 733       __ ftoi(FloatRegisterImpl::S, rsrc, rsrc);
 734       // move integer result from float register to int register
 735       __ stf(FloatRegisterImpl::S, rsrc, addr.base(), addr.disp());
 736       __ bind (L);
 737       break;
 738     }
 739     case Bytecodes::_l2i: {
 740       Register rlo  = op->in_opr()->as_register_lo();
 741       Register rhi  = op->in_opr()->as_register_hi();
 742       Register rdst = dst->as_register();
 743 #ifdef _LP64
 744       __ sra(rlo, 0, rdst);
 745 #else
 746       __ mov(rlo, rdst);
 747 #endif
 748       break;
 749     }
 750     case Bytecodes::_d2f:
 751     case Bytecodes::_f2d: {
 752       bool is_double = (code == Bytecodes::_f2d);
 753       assert((!is_double && dst->is_single_fpu()) || (is_double && dst->is_double_fpu()), "check");
 754       LIR_Opr val = op->in_opr();
 755       FloatRegister rval = (code == Bytecodes::_d2f) ? val->as_double_reg() : val->as_float_reg();
 756       FloatRegister rdst = is_double ? dst->as_double_reg() : dst->as_float_reg();
 757       FloatRegisterImpl::Width vw = is_double ? FloatRegisterImpl::S : FloatRegisterImpl::D;
 758       FloatRegisterImpl::Width dw = is_double ? FloatRegisterImpl::D : FloatRegisterImpl::S;
 759       __ ftof(vw, dw, rval, rdst);
 760       break;
 761     }
 762     case Bytecodes::_i2s:
 763     case Bytecodes::_i2b: {
 764       Register rval = op->in_opr()->as_register();
 765       Register rdst = dst->as_register();
 766       int shift = (code == Bytecodes::_i2b) ? (BitsPerInt - T_BYTE_aelem_bytes * BitsPerByte) : (BitsPerInt - BitsPerShort);
 767       __ sll (rval, shift, rdst);
 768       __ sra (rdst, shift, rdst);
 769       break;
 770     }
 771     case Bytecodes::_i2c: {
 772       Register rval = op->in_opr()->as_register();
 773       Register rdst = dst->as_register();
 774       int shift = BitsPerInt - T_CHAR_aelem_bytes * BitsPerByte;
 775       __ sll (rval, shift, rdst);
 776       __ srl (rdst, shift, rdst);
 777       break;
 778     }
 779 
 780     default: ShouldNotReachHere();
 781   }
 782 }
 783 
 784 
 785 void LIR_Assembler::align_call(LIR_Code) {
 786   // do nothing since all instructions are word aligned on sparc
 787 }
 788 
 789 
 790 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
 791   __ call(op->addr(), rtype);
 792   // The peephole pass fills the delay slot, add_call_info is done in
 793   // LIR_Assembler::emit_delay.
 794 }
 795 
 796 
 797 void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
 798   __ ic_call(op->addr(), false);
 799   // The peephole pass fills the delay slot, add_call_info is done in
 800   // LIR_Assembler::emit_delay.
 801 }
 802 
 803 
 804 void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
 805   add_debug_info_for_null_check_here(op->info());
 806   __ load_klass(O0, G3_scratch);
 807   if (Assembler::is_simm13(op->vtable_offset())) {
 808     __ ld_ptr(G3_scratch, op->vtable_offset(), G5_method);
 809   } else {
 810     // This will generate 2 instructions
 811     __ set(op->vtable_offset(), G5_method);
 812     // ld_ptr, set_hi, set
 813     __ ld_ptr(G3_scratch, G5_method, G5_method);
 814   }
 815   __ ld_ptr(G5_method, Method::from_compiled_offset(), G3_scratch);
 816   __ callr(G3_scratch, G0);
 817   // the peephole pass fills the delay slot
 818 }
 819 
 820 int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned) {
 821   int store_offset;
 822   if (!Assembler::is_simm13(offset + (type == T_LONG) ? wordSize : 0)) {
 823     assert(!unaligned, "can't handle this");
 824     // for offsets larger than a simm13 we setup the offset in O7
 825     __ set(offset, O7);
 826     store_offset = store(from_reg, base, O7, type, wide);
 827   } else {
 828     if (type == T_ARRAY || type == T_OBJECT) {
 829       __ verify_oop(from_reg->as_register());
 830     }
 831     store_offset = code_offset();
 832     switch (type) {
 833       case T_BOOLEAN: // fall through
 834       case T_BYTE  : __ stb(from_reg->as_register(), base, offset); break;
 835       case T_CHAR  : __ sth(from_reg->as_register(), base, offset); break;
 836       case T_SHORT : __ sth(from_reg->as_register(), base, offset); break;
 837       case T_INT   : __ stw(from_reg->as_register(), base, offset); break;
 838       case T_LONG  :
 839 #ifdef _LP64
 840         if (unaligned || PatchALot) {
 841           __ srax(from_reg->as_register_lo(), 32, O7);
 842           __ stw(from_reg->as_register_lo(), base, offset + lo_word_offset_in_bytes);
 843           __ stw(O7,                         base, offset + hi_word_offset_in_bytes);
 844         } else {
 845           __ stx(from_reg->as_register_lo(), base, offset);
 846         }
 847 #else
 848         assert(Assembler::is_simm13(offset + 4), "must be");
 849         __ stw(from_reg->as_register_lo(), base, offset + lo_word_offset_in_bytes);
 850         __ stw(from_reg->as_register_hi(), base, offset + hi_word_offset_in_bytes);
 851 #endif
 852         break;
 853       case T_ADDRESS:
 854       case T_METADATA:
 855         __ st_ptr(from_reg->as_register(), base, offset);
 856         break;
 857       case T_ARRAY : // fall through
 858       case T_OBJECT:
 859         {
 860           if (UseCompressedOops && !wide) {
 861             __ encode_heap_oop(from_reg->as_register(), G3_scratch);
 862             store_offset = code_offset();
 863             __ stw(G3_scratch, base, offset);
 864           } else {
 865             __ st_ptr(from_reg->as_register(), base, offset);
 866           }
 867           break;
 868         }
 869 
 870       case T_FLOAT : __ stf(FloatRegisterImpl::S, from_reg->as_float_reg(), base, offset); break;
 871       case T_DOUBLE:
 872         {
 873           FloatRegister reg = from_reg->as_double_reg();
 874           // split unaligned stores
 875           if (unaligned || PatchALot) {
 876             assert(Assembler::is_simm13(offset + 4), "must be");
 877             __ stf(FloatRegisterImpl::S, reg->successor(), base, offset + 4);
 878             __ stf(FloatRegisterImpl::S, reg,              base, offset);
 879           } else {
 880             __ stf(FloatRegisterImpl::D, reg, base, offset);
 881           }
 882           break;
 883         }
 884       default      : ShouldNotReachHere();
 885     }
 886   }
 887   return store_offset;
 888 }
 889 
 890 
 891 int LIR_Assembler::store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide) {
 892   if (type == T_ARRAY || type == T_OBJECT) {
 893     __ verify_oop(from_reg->as_register());
 894   }
 895   int store_offset = code_offset();
 896   switch (type) {
 897     case T_BOOLEAN: // fall through
 898     case T_BYTE  : __ stb(from_reg->as_register(), base, disp); break;
 899     case T_CHAR  : __ sth(from_reg->as_register(), base, disp); break;
 900     case T_SHORT : __ sth(from_reg->as_register(), base, disp); break;
 901     case T_INT   : __ stw(from_reg->as_register(), base, disp); break;
 902     case T_LONG  :
 903 #ifdef _LP64
 904       __ stx(from_reg->as_register_lo(), base, disp);
 905 #else
 906       assert(from_reg->as_register_hi()->successor() == from_reg->as_register_lo(), "must match");
 907       __ std(from_reg->as_register_hi(), base, disp);
 908 #endif
 909       break;
 910     case T_ADDRESS:
 911       __ st_ptr(from_reg->as_register(), base, disp);
 912       break;
 913     case T_ARRAY : // fall through
 914     case T_OBJECT:
 915       {
 916         if (UseCompressedOops && !wide) {
 917           __ encode_heap_oop(from_reg->as_register(), G3_scratch);
 918           store_offset = code_offset();
 919           __ stw(G3_scratch, base, disp);
 920         } else {
 921           __ st_ptr(from_reg->as_register(), base, disp);
 922         }
 923         break;
 924       }
 925     case T_FLOAT : __ stf(FloatRegisterImpl::S, from_reg->as_float_reg(), base, disp); break;
 926     case T_DOUBLE: __ stf(FloatRegisterImpl::D, from_reg->as_double_reg(), base, disp); break;
 927     default      : ShouldNotReachHere();
 928   }
 929   return store_offset;
 930 }
 931 
 932 
 933 int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned) {
 934   int load_offset;
 935   if (!Assembler::is_simm13(offset + (type == T_LONG) ? wordSize : 0)) {
 936     assert(base != O7, "destroying register");
 937     assert(!unaligned, "can't handle this");
 938     // for offsets larger than a simm13 we setup the offset in O7
 939     __ set(offset, O7);
 940     load_offset = load(base, O7, to_reg, type, wide);
 941   } else {
 942     load_offset = code_offset();
 943     switch(type) {
 944       case T_BOOLEAN: // fall through
 945       case T_BYTE  : __ ldsb(base, offset, to_reg->as_register()); break;
 946       case T_CHAR  : __ lduh(base, offset, to_reg->as_register()); break;
 947       case T_SHORT : __ ldsh(base, offset, to_reg->as_register()); break;
 948       case T_INT   : __ ld(base, offset, to_reg->as_register()); break;
 949       case T_LONG  :
 950         if (!unaligned) {
 951 #ifdef _LP64
 952           __ ldx(base, offset, to_reg->as_register_lo());
 953 #else
 954           assert(to_reg->as_register_hi()->successor() == to_reg->as_register_lo(),
 955                  "must be sequential");
 956           __ ldd(base, offset, to_reg->as_register_hi());
 957 #endif
 958         } else {
 959 #ifdef _LP64
 960           assert(base != to_reg->as_register_lo(), "can't handle this");
 961           assert(O7 != to_reg->as_register_lo(), "can't handle this");
 962           __ ld(base, offset + hi_word_offset_in_bytes, to_reg->as_register_lo());
 963           __ lduw(base, offset + lo_word_offset_in_bytes, O7); // in case O7 is base or offset, use it last
 964           __ sllx(to_reg->as_register_lo(), 32, to_reg->as_register_lo());
 965           __ or3(to_reg->as_register_lo(), O7, to_reg->as_register_lo());
 966 #else
 967           if (base == to_reg->as_register_lo()) {
 968             __ ld(base, offset + hi_word_offset_in_bytes, to_reg->as_register_hi());
 969             __ ld(base, offset + lo_word_offset_in_bytes, to_reg->as_register_lo());
 970           } else {
 971             __ ld(base, offset + lo_word_offset_in_bytes, to_reg->as_register_lo());
 972             __ ld(base, offset + hi_word_offset_in_bytes, to_reg->as_register_hi());
 973           }
 974 #endif
 975         }
 976         break;
 977       case T_METADATA:  __ ld_ptr(base, offset, to_reg->as_register()); break;
 978       case T_ADDRESS:
 979 #ifdef _LP64
 980         if (offset == oopDesc::klass_offset_in_bytes() && UseCompressedKlassPointers) {
 981           __ lduw(base, offset, to_reg->as_register());
 982           __ decode_klass_not_null(to_reg->as_register());
 983         } else
 984 #endif
 985         {
 986           __ ld_ptr(base, offset, to_reg->as_register());
 987         }
 988         break;
 989       case T_ARRAY : // fall through
 990       case T_OBJECT:
 991         {
 992           if (UseCompressedOops && !wide) {
 993             __ lduw(base, offset, to_reg->as_register());
 994             __ decode_heap_oop(to_reg->as_register());
 995           } else {
 996             __ ld_ptr(base, offset, to_reg->as_register());
 997           }
 998           break;
 999         }
1000       case T_FLOAT:  __ ldf(FloatRegisterImpl::S, base, offset, to_reg->as_float_reg()); break;
1001       case T_DOUBLE:
1002         {
1003           FloatRegister reg = to_reg->as_double_reg();
1004           // split unaligned loads
1005           if (unaligned || PatchALot) {
1006             __ ldf(FloatRegisterImpl::S, base, offset + 4, reg->successor());
1007             __ ldf(FloatRegisterImpl::S, base, offset,     reg);
1008           } else {
1009             __ ldf(FloatRegisterImpl::D, base, offset, to_reg->as_double_reg());
1010           }
1011           break;
1012         }
1013       default      : ShouldNotReachHere();
1014     }
1015     if (type == T_ARRAY || type == T_OBJECT) {
1016       __ verify_oop(to_reg->as_register());
1017     }
1018   }
1019   return load_offset;
1020 }
1021 
1022 
1023 int LIR_Assembler::load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide) {
1024   int load_offset = code_offset();
1025   switch(type) {
1026     case T_BOOLEAN: // fall through
1027     case T_BYTE  :  __ ldsb(base, disp, to_reg->as_register()); break;
1028     case T_CHAR  :  __ lduh(base, disp, to_reg->as_register()); break;
1029     case T_SHORT :  __ ldsh(base, disp, to_reg->as_register()); break;
1030     case T_INT   :  __ ld(base, disp, to_reg->as_register()); break;
1031     case T_ADDRESS: __ ld_ptr(base, disp, to_reg->as_register()); break;
1032     case T_ARRAY : // fall through
1033     case T_OBJECT:
1034       {
1035           if (UseCompressedOops && !wide) {
1036             __ lduw(base, disp, to_reg->as_register());
1037             __ decode_heap_oop(to_reg->as_register());
1038           } else {
1039             __ ld_ptr(base, disp, to_reg->as_register());
1040           }
1041           break;
1042       }
1043     case T_FLOAT:  __ ldf(FloatRegisterImpl::S, base, disp, to_reg->as_float_reg()); break;
1044     case T_DOUBLE: __ ldf(FloatRegisterImpl::D, base, disp, to_reg->as_double_reg()); break;
1045     case T_LONG  :
1046 #ifdef _LP64
1047       __ ldx(base, disp, to_reg->as_register_lo());
1048 #else
1049       assert(to_reg->as_register_hi()->successor() == to_reg->as_register_lo(),
1050              "must be sequential");
1051       __ ldd(base, disp, to_reg->as_register_hi());
1052 #endif
1053       break;
1054     default      : ShouldNotReachHere();
1055   }
1056   if (type == T_ARRAY || type == T_OBJECT) {
1057     __ verify_oop(to_reg->as_register());
1058   }
1059   return load_offset;
1060 }
1061 
1062 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
1063   LIR_Const* c = src->as_constant_ptr();
1064   switch (c->type()) {
1065     case T_INT:
1066     case T_FLOAT: {
1067       Register src_reg = O7;
1068       int value = c->as_jint_bits();
1069       if (value == 0) {
1070         src_reg = G0;
1071       } else {
1072         __ set(value, O7);
1073       }
1074       Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
1075       __ stw(src_reg, addr.base(), addr.disp());
1076       break;
1077     }
1078     case T_ADDRESS: {
1079       Register src_reg = O7;
1080       int value = c->as_jint_bits();
1081       if (value == 0) {
1082         src_reg = G0;
1083       } else {
1084         __ set(value, O7);
1085       }
1086       Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
1087       __ st_ptr(src_reg, addr.base(), addr.disp());
1088       break;
1089     }
1090     case T_OBJECT: {
1091       Register src_reg = O7;
1092       jobject2reg(c->as_jobject(), src_reg);
1093       Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
1094       __ st_ptr(src_reg, addr.base(), addr.disp());
1095       break;
1096     }
1097     case T_LONG:
1098     case T_DOUBLE: {
1099       Address addr = frame_map()->address_for_double_slot(dest->double_stack_ix());
1100 
1101       Register tmp = O7;
1102       int value_lo = c->as_jint_lo_bits();
1103       if (value_lo == 0) {
1104         tmp = G0;
1105       } else {
1106         __ set(value_lo, O7);
1107       }
1108       __ stw(tmp, addr.base(), addr.disp() + lo_word_offset_in_bytes);
1109       int value_hi = c->as_jint_hi_bits();
1110       if (value_hi == 0) {
1111         tmp = G0;
1112       } else {
1113         __ set(value_hi, O7);
1114       }
1115       __ stw(tmp, addr.base(), addr.disp() + hi_word_offset_in_bytes);
1116       break;
1117     }
1118     default:
1119       Unimplemented();
1120   }
1121 }
1122 
1123 
1124 void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
1125   LIR_Const* c = src->as_constant_ptr();
1126   LIR_Address* addr     = dest->as_address_ptr();
1127   Register base = addr->base()->as_pointer_register();
1128   int offset = -1;
1129 
1130   switch (c->type()) {
1131     case T_INT:
1132     case T_FLOAT:
1133     case T_ADDRESS: {
1134       LIR_Opr tmp = FrameMap::O7_opr;
1135       int value = c->as_jint_bits();
1136       if (value == 0) {
1137         tmp = FrameMap::G0_opr;
1138       } else if (Assembler::is_simm13(value)) {
1139         __ set(value, O7);
1140       }
1141       if (addr->index()->is_valid()) {
1142         assert(addr->disp() == 0, "must be zero");
1143         offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide);
1144       } else {
1145         assert(Assembler::is_simm13(addr->disp()), "can't handle larger addresses");
1146         offset = store(tmp, base, addr->disp(), type, wide, false);
1147       }
1148       break;
1149     }
1150     case T_LONG:
1151     case T_DOUBLE: {
1152       assert(!addr->index()->is_valid(), "can't handle reg reg address here");
1153       assert(Assembler::is_simm13(addr->disp()) &&
1154              Assembler::is_simm13(addr->disp() + 4), "can't handle larger addresses");
1155 
1156       LIR_Opr tmp = FrameMap::O7_opr;
1157       int value_lo = c->as_jint_lo_bits();
1158       if (value_lo == 0) {
1159         tmp = FrameMap::G0_opr;
1160       } else {
1161         __ set(value_lo, O7);
1162       }
1163       offset = store(tmp, base, addr->disp() + lo_word_offset_in_bytes, T_INT, wide, false);
1164       int value_hi = c->as_jint_hi_bits();
1165       if (value_hi == 0) {
1166         tmp = FrameMap::G0_opr;
1167       } else {
1168         __ set(value_hi, O7);
1169       }
1170       store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false);
1171       break;
1172     }
1173     case T_OBJECT: {
1174       jobject obj = c->as_jobject();
1175       LIR_Opr tmp;
1176       if (obj == NULL) {
1177         tmp = FrameMap::G0_opr;
1178       } else {
1179         tmp = FrameMap::O7_opr;
1180         jobject2reg(c->as_jobject(), O7);
1181       }
1182       // handle either reg+reg or reg+disp address
1183       if (addr->index()->is_valid()) {
1184         assert(addr->disp() == 0, "must be zero");
1185         offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide);
1186       } else {
1187         assert(Assembler::is_simm13(addr->disp()), "can't handle larger addresses");
1188         offset = store(tmp, base, addr->disp(), type, wide, false);
1189       }
1190 
1191       break;
1192     }
1193     default:
1194       Unimplemented();
1195   }
1196   if (info != NULL) {
1197     assert(offset != -1, "offset should've been set");
1198     add_debug_info_for_null_check(offset, info);
1199   }
1200 }
1201 
1202 
1203 void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
1204   LIR_Const* c = src->as_constant_ptr();
1205   LIR_Opr to_reg = dest;
1206 
1207   switch (c->type()) {
1208     case T_INT:
1209     case T_ADDRESS:
1210       {
1211         jint con = c->as_jint();
1212         if (to_reg->is_single_cpu()) {
1213           assert(patch_code == lir_patch_none, "no patching handled here");
1214           __ set(con, to_reg->as_register());
1215         } else {
1216           ShouldNotReachHere();
1217           assert(to_reg->is_single_fpu(), "wrong register kind");
1218 
1219           __ set(con, O7);
1220           Address temp_slot(SP, (frame::register_save_words * wordSize) + STACK_BIAS);
1221           __ st(O7, temp_slot);
1222           __ ldf(FloatRegisterImpl::S, temp_slot, to_reg->as_float_reg());
1223         }
1224       }
1225       break;
1226 
1227     case T_LONG:
1228       {
1229         jlong con = c->as_jlong();
1230 
1231         if (to_reg->is_double_cpu()) {
1232 #ifdef _LP64
1233           __ set(con,  to_reg->as_register_lo());
1234 #else
1235           __ set(low(con),  to_reg->as_register_lo());
1236           __ set(high(con), to_reg->as_register_hi());
1237 #endif
1238 #ifdef _LP64
1239         } else if (to_reg->is_single_cpu()) {
1240           __ set(con, to_reg->as_register());
1241 #endif
1242         } else {
1243           ShouldNotReachHere();
1244           assert(to_reg->is_double_fpu(), "wrong register kind");
1245           Address temp_slot_lo(SP, ((frame::register_save_words  ) * wordSize) + STACK_BIAS);
1246           Address temp_slot_hi(SP, ((frame::register_save_words) * wordSize) + (longSize/2) + STACK_BIAS);
1247           __ set(low(con),  O7);
1248           __ st(O7, temp_slot_lo);
1249           __ set(high(con), O7);
1250           __ st(O7, temp_slot_hi);
1251           __ ldf(FloatRegisterImpl::D, temp_slot_lo, to_reg->as_double_reg());
1252         }
1253       }
1254       break;
1255 
1256     case T_OBJECT:
1257       {
1258         if (patch_code == lir_patch_none) {
1259           jobject2reg(c->as_jobject(), to_reg->as_register());
1260         } else {
1261           jobject2reg_with_patching(to_reg->as_register(), info);
1262         }
1263       }
1264       break;
1265 
1266     case T_METADATA:
1267       {
1268         if (patch_code == lir_patch_none) {
1269           metadata2reg(c->as_metadata(), to_reg->as_register());
1270         } else {
1271           klass2reg_with_patching(to_reg->as_register(), info);
1272         }
1273       }
1274       break;
1275 
1276     case T_FLOAT:
1277       {
1278         address const_addr = __ float_constant(c->as_jfloat());
1279         if (const_addr == NULL) {
1280           bailout("const section overflow");
1281           break;
1282         }
1283         RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
1284         AddressLiteral const_addrlit(const_addr, rspec);
1285         if (to_reg->is_single_fpu()) {
1286           __ patchable_sethi(const_addrlit, O7);
1287           __ relocate(rspec);
1288           __ ldf(FloatRegisterImpl::S, O7, const_addrlit.low10(), to_reg->as_float_reg());
1289 
1290         } else {
1291           assert(to_reg->is_single_cpu(), "Must be a cpu register.");
1292 
1293           __ set(const_addrlit, O7);
1294           __ ld(O7, 0, to_reg->as_register());
1295         }
1296       }
1297       break;
1298 
1299     case T_DOUBLE:
1300       {
1301         address const_addr = __ double_constant(c->as_jdouble());
1302         if (const_addr == NULL) {
1303           bailout("const section overflow");
1304           break;
1305         }
1306         RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
1307 
1308         if (to_reg->is_double_fpu()) {
1309           AddressLiteral const_addrlit(const_addr, rspec);
1310           __ patchable_sethi(const_addrlit, O7);
1311           __ relocate(rspec);
1312           __ ldf (FloatRegisterImpl::D, O7, const_addrlit.low10(), to_reg->as_double_reg());
1313         } else {
1314           assert(to_reg->is_double_cpu(), "Must be a long register.");
1315 #ifdef _LP64
1316           __ set(jlong_cast(c->as_jdouble()), to_reg->as_register_lo());
1317 #else
1318           __ set(low(jlong_cast(c->as_jdouble())), to_reg->as_register_lo());
1319           __ set(high(jlong_cast(c->as_jdouble())), to_reg->as_register_hi());
1320 #endif
1321         }
1322 
1323       }
1324       break;
1325 
1326     default:
1327       ShouldNotReachHere();
1328   }
1329 }
1330 
1331 Address LIR_Assembler::as_Address(LIR_Address* addr) {
1332   Register reg = addr->base()->as_register();
1333   LIR_Opr index = addr->index();
1334   if (index->is_illegal()) {
1335     return Address(reg, addr->disp());
1336   } else {
1337     assert (addr->disp() == 0, "unsupported address mode");
1338     return Address(reg, index->as_pointer_register());
1339   }
1340 }
1341 
1342 
1343 void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
1344   switch (type) {
1345     case T_INT:
1346     case T_FLOAT: {
1347       Register tmp = O7;
1348       Address from = frame_map()->address_for_slot(src->single_stack_ix());
1349       Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
1350       __ lduw(from.base(), from.disp(), tmp);
1351       __ stw(tmp, to.base(), to.disp());
1352       break;
1353     }
1354     case T_OBJECT: {
1355       Register tmp = O7;
1356       Address from = frame_map()->address_for_slot(src->single_stack_ix());
1357       Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
1358       __ ld_ptr(from.base(), from.disp(), tmp);
1359       __ st_ptr(tmp, to.base(), to.disp());
1360       break;
1361     }
1362     case T_LONG:
1363     case T_DOUBLE: {
1364       Register tmp = O7;
1365       Address from = frame_map()->address_for_double_slot(src->double_stack_ix());
1366       Address to   = frame_map()->address_for_double_slot(dest->double_stack_ix());
1367       __ lduw(from.base(), from.disp(), tmp);
1368       __ stw(tmp, to.base(), to.disp());
1369       __ lduw(from.base(), from.disp() + 4, tmp);
1370       __ stw(tmp, to.base(), to.disp() + 4);
1371       break;
1372     }
1373 
1374     default:
1375       ShouldNotReachHere();
1376   }
1377 }
1378 
1379 
1380 Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
1381   Address base = as_Address(addr);
1382   return Address(base.base(), base.disp() + hi_word_offset_in_bytes);
1383 }
1384 
1385 
1386 Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
1387   Address base = as_Address(addr);
1388   return Address(base.base(), base.disp() + lo_word_offset_in_bytes);
1389 }
1390 
1391 
1392 void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type,
1393                             LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool unaligned) {
1394 
1395   assert(type != T_METADATA, "load of metadata ptr not supported");
1396   LIR_Address* addr = src_opr->as_address_ptr();
1397   LIR_Opr to_reg = dest;
1398 
1399   Register src = addr->base()->as_pointer_register();
1400   Register disp_reg = noreg;
1401   int disp_value = addr->disp();
1402   bool needs_patching = (patch_code != lir_patch_none);
1403 
1404   if (addr->base()->type() == T_OBJECT) {
1405     __ verify_oop(src);
1406   }
1407 
1408   PatchingStub* patch = NULL;
1409   if (needs_patching) {
1410     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1411     assert(!to_reg->is_double_cpu() ||
1412            patch_code == lir_patch_none ||
1413            patch_code == lir_patch_normal, "patching doesn't match register");
1414   }
1415 
1416   if (addr->index()->is_illegal()) {
1417     if (!Assembler::is_simm13(disp_value) && (!unaligned || Assembler::is_simm13(disp_value + 4))) {
1418       if (needs_patching) {
1419         __ patchable_set(0, O7);
1420       } else {
1421         __ set(disp_value, O7);
1422       }
1423       disp_reg = O7;
1424     }
1425   } else if (unaligned || PatchALot) {
1426     __ add(src, addr->index()->as_register(), O7);
1427     src = O7;
1428   } else {
1429     disp_reg = addr->index()->as_pointer_register();
1430     assert(disp_value == 0, "can't handle 3 operand addresses");
1431   }
1432 
1433   // remember the offset of the load.  The patching_epilog must be done
1434   // before the call to add_debug_info, otherwise the PcDescs don't get
1435   // entered in increasing order.
1436   int offset = code_offset();
1437 
1438   assert(disp_reg != noreg || Assembler::is_simm13(disp_value), "should have set this up");
1439   if (disp_reg == noreg) {
1440     offset = load(src, disp_value, to_reg, type, wide, unaligned);
1441   } else {
1442     assert(!unaligned, "can't handle this");
1443     offset = load(src, disp_reg, to_reg, type, wide);
1444   }
1445 
1446   if (patch != NULL) {
1447     patching_epilog(patch, patch_code, src, info);
1448   }
1449   if (info != NULL) add_debug_info_for_null_check(offset, info);
1450 }
1451 
1452 
1453 void LIR_Assembler::prefetchr(LIR_Opr src) {
1454   LIR_Address* addr = src->as_address_ptr();
1455   Address from_addr = as_Address(addr);
1456 
1457   if (VM_Version::has_v9()) {
1458     __ prefetch(from_addr, Assembler::severalReads);
1459   }
1460 }
1461 
1462 
1463 void LIR_Assembler::prefetchw(LIR_Opr src) {
1464   LIR_Address* addr = src->as_address_ptr();
1465   Address from_addr = as_Address(addr);
1466 
1467   if (VM_Version::has_v9()) {
1468     __ prefetch(from_addr, Assembler::severalWritesAndPossiblyReads);
1469   }
1470 }
1471 
1472 
1473 void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
1474   Address addr;
1475   if (src->is_single_word()) {
1476     addr = frame_map()->address_for_slot(src->single_stack_ix());
1477   } else if (src->is_double_word())  {
1478     addr = frame_map()->address_for_double_slot(src->double_stack_ix());
1479   }
1480 
1481   bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
1482   load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/, unaligned);
1483 }
1484 
1485 
1486 void LIR_Assembler::reg2stack(LIR_Opr from_reg, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
1487   Address addr;
1488   if (dest->is_single_word()) {
1489     addr = frame_map()->address_for_slot(dest->single_stack_ix());
1490   } else if (dest->is_double_word())  {
1491     addr = frame_map()->address_for_slot(dest->double_stack_ix());
1492   }
1493   bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
1494   store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/, unaligned);
1495 }
1496 
1497 
1498 void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) {
1499   if (from_reg->is_float_kind() && to_reg->is_float_kind()) {
1500     if (from_reg->is_double_fpu()) {
1501       // double to double moves
1502       assert(to_reg->is_double_fpu(), "should match");
1503       __ fmov(FloatRegisterImpl::D, from_reg->as_double_reg(), to_reg->as_double_reg());
1504     } else {
1505       // float to float moves
1506       assert(to_reg->is_single_fpu(), "should match");
1507       __ fmov(FloatRegisterImpl::S, from_reg->as_float_reg(), to_reg->as_float_reg());
1508     }
1509   } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) {
1510     if (from_reg->is_double_cpu()) {
1511 #ifdef _LP64
1512       __ mov(from_reg->as_pointer_register(), to_reg->as_pointer_register());
1513 #else
1514       assert(to_reg->is_double_cpu() &&
1515              from_reg->as_register_hi() != to_reg->as_register_lo() &&
1516              from_reg->as_register_lo() != to_reg->as_register_hi(),
1517              "should both be long and not overlap");
1518       // long to long moves
1519       __ mov(from_reg->as_register_hi(), to_reg->as_register_hi());
1520       __ mov(from_reg->as_register_lo(), to_reg->as_register_lo());
1521 #endif
1522 #ifdef _LP64
1523     } else if (to_reg->is_double_cpu()) {
1524       // int to int moves
1525       __ mov(from_reg->as_register(), to_reg->as_register_lo());
1526 #endif
1527     } else {
1528       // int to int moves
1529       __ mov(from_reg->as_register(), to_reg->as_register());
1530     }
1531   } else {
1532     ShouldNotReachHere();
1533   }
1534   if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) {
1535     __ verify_oop(to_reg->as_register());
1536   }
1537 }
1538 
1539 
1540 void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type,
1541                             LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack,
1542                             bool wide, bool unaligned) {
1543   assert(type != T_METADATA, "store of metadata ptr not supported");
1544   LIR_Address* addr = dest->as_address_ptr();
1545 
1546   Register src = addr->base()->as_pointer_register();
1547   Register disp_reg = noreg;
1548   int disp_value = addr->disp();
1549   bool needs_patching = (patch_code != lir_patch_none);
1550 
1551   if (addr->base()->is_oop_register()) {
1552     __ verify_oop(src);
1553   }
1554 
1555   PatchingStub* patch = NULL;
1556   if (needs_patching) {
1557     patch = new PatchingStub(_masm, PatchingStub::access_field_id);
1558     assert(!from_reg->is_double_cpu() ||
1559            patch_code == lir_patch_none ||
1560            patch_code == lir_patch_normal, "patching doesn't match register");
1561   }
1562 
1563   if (addr->index()->is_illegal()) {
1564     if (!Assembler::is_simm13(disp_value) && (!unaligned || Assembler::is_simm13(disp_value + 4))) {
1565       if (needs_patching) {
1566         __ patchable_set(0, O7);
1567       } else {
1568         __ set(disp_value, O7);
1569       }
1570       disp_reg = O7;
1571     }
1572   } else if (unaligned || PatchALot) {
1573     __ add(src, addr->index()->as_register(), O7);
1574     src = O7;
1575   } else {
1576     disp_reg = addr->index()->as_pointer_register();
1577     assert(disp_value == 0, "can't handle 3 operand addresses");
1578   }
1579 
1580   // remember the offset of the store.  The patching_epilog must be done
1581   // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get
1582   // entered in increasing order.
1583   int offset;
1584 
1585   assert(disp_reg != noreg || Assembler::is_simm13(disp_value), "should have set this up");
1586   if (disp_reg == noreg) {
1587     offset = store(from_reg, src, disp_value, type, wide, unaligned);
1588   } else {
1589     assert(!unaligned, "can't handle this");
1590     offset = store(from_reg, src, disp_reg, type, wide);
1591   }
1592 
1593   if (patch != NULL) {
1594     patching_epilog(patch, patch_code, src, info);
1595   }
1596 
1597   if (info != NULL) add_debug_info_for_null_check(offset, info);
1598 }
1599 
1600 
1601 void LIR_Assembler::return_op(LIR_Opr result) {
1602   // the poll may need a register so just pick one that isn't the return register
1603 #if defined(TIERED) && !defined(_LP64)
1604   if (result->type_field() == LIR_OprDesc::long_type) {
1605     // Must move the result to G1
1606     // Must leave proper result in O0,O1 and G1 (TIERED only)
1607     __ sllx(I0, 32, G1);          // Shift bits into high G1
1608     __ srl (I1, 0, I1);           // Zero extend O1 (harmless?)
1609     __ or3 (I1, G1, G1);          // OR 64 bits into G1
1610 #ifdef ASSERT
1611     // mangle it so any problems will show up
1612     __ set(0xdeadbeef, I0);
1613     __ set(0xdeadbeef, I1);
1614 #endif
1615   }
1616 #endif // TIERED
1617   __ set((intptr_t)os::get_polling_page(), L0);
1618   __ relocate(relocInfo::poll_return_type);
1619   __ ld_ptr(L0, 0, G0);
1620   __ ret();
1621   __ delayed()->restore();
1622 }
1623 
1624 
1625 int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
1626   __ set((intptr_t)os::get_polling_page(), tmp->as_register());
1627   if (info != NULL) {
1628     add_debug_info_for_branch(info);
1629   } else {
1630     __ relocate(relocInfo::poll_type);
1631   }
1632 
1633   int offset = __ offset();
1634   __ ld_ptr(tmp->as_register(), 0, G0);
1635 
1636   return offset;
1637 }
1638 
1639 
1640 void LIR_Assembler::emit_static_call_stub() {
1641   address call_pc = __ pc();
1642   address stub = __ start_a_stub(call_stub_size);
1643   if (stub == NULL) {
1644     bailout("static call stub overflow");
1645     return;
1646   }
1647 
1648   int start = __ offset();
1649   __ relocate(static_stub_Relocation::spec(call_pc));
1650 
1651   __ set_metadata(NULL, G5);
1652   // must be set to -1 at code generation time
1653   AddressLiteral addrlit(-1);
1654   __ jump_to(addrlit, G3);
1655   __ delayed()->nop();
1656 
1657   assert(__ offset() - start <= call_stub_size, "stub too big");
1658   __ end_a_stub();
1659 }
1660 
1661 
1662 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
1663   if (opr1->is_single_fpu()) {
1664     __ fcmp(FloatRegisterImpl::S, Assembler::fcc0, opr1->as_float_reg(), opr2->as_float_reg());
1665   } else if (opr1->is_double_fpu()) {
1666     __ fcmp(FloatRegisterImpl::D, Assembler::fcc0, opr1->as_double_reg(), opr2->as_double_reg());
1667   } else if (opr1->is_single_cpu()) {
1668     if (opr2->is_constant()) {
1669       switch (opr2->as_constant_ptr()->type()) {
1670         case T_INT:
1671           { jint con = opr2->as_constant_ptr()->as_jint();
1672             if (Assembler::is_simm13(con)) {
1673               __ cmp(opr1->as_register(), con);
1674             } else {
1675               __ set(con, O7);
1676               __ cmp(opr1->as_register(), O7);
1677             }
1678           }
1679           break;
1680 
1681         case T_OBJECT:
1682           // there are only equal/notequal comparisions on objects
1683           { jobject con = opr2->as_constant_ptr()->as_jobject();
1684             if (con == NULL) {
1685               __ cmp(opr1->as_register(), 0);
1686             } else {
1687               jobject2reg(con, O7);
1688               __ cmp(opr1->as_register(), O7);
1689             }
1690           }
1691           break;
1692 
1693         default:
1694           ShouldNotReachHere();
1695           break;
1696       }
1697     } else {
1698       if (opr2->is_address()) {
1699         LIR_Address * addr = opr2->as_address_ptr();
1700         BasicType type = addr->type();
1701         if ( type == T_OBJECT ) __ ld_ptr(as_Address(addr), O7);
1702         else                    __ ld(as_Address(addr), O7);
1703         __ cmp(opr1->as_register(), O7);
1704       } else {
1705         __ cmp(opr1->as_register(), opr2->as_register());
1706       }
1707     }
1708   } else if (opr1->is_double_cpu()) {
1709     Register xlo = opr1->as_register_lo();
1710     Register xhi = opr1->as_register_hi();
1711     if (opr2->is_constant() && opr2->as_jlong() == 0) {
1712       assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles these cases");
1713 #ifdef _LP64
1714       __ orcc(xhi, G0, G0);
1715 #else
1716       __ orcc(xhi, xlo, G0);
1717 #endif
1718     } else if (opr2->is_register()) {
1719       Register ylo = opr2->as_register_lo();
1720       Register yhi = opr2->as_register_hi();
1721 #ifdef _LP64
1722       __ cmp(xlo, ylo);
1723 #else
1724       __ subcc(xlo, ylo, xlo);
1725       __ subccc(xhi, yhi, xhi);
1726       if (condition == lir_cond_equal || condition == lir_cond_notEqual) {
1727         __ orcc(xhi, xlo, G0);
1728       }
1729 #endif
1730     } else {
1731       ShouldNotReachHere();
1732     }
1733   } else if (opr1->is_address()) {
1734     LIR_Address * addr = opr1->as_address_ptr();
1735     BasicType type = addr->type();
1736     assert (opr2->is_constant(), "Checking");
1737     if ( type == T_OBJECT ) __ ld_ptr(as_Address(addr), O7);
1738     else                    __ ld(as_Address(addr), O7);
1739     __ cmp(O7, opr2->as_constant_ptr()->as_jint());
1740   } else {
1741     ShouldNotReachHere();
1742   }
1743 }
1744 
1745 
1746 void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
1747   if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
1748     bool is_unordered_less = (code == lir_ucmp_fd2i);
1749     if (left->is_single_fpu()) {
1750       __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register());
1751     } else if (left->is_double_fpu()) {
1752       __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register());
1753     } else {
1754       ShouldNotReachHere();
1755     }
1756   } else if (code == lir_cmp_l2i) {
1757 #ifdef _LP64
1758     __ lcmp(left->as_register_lo(), right->as_register_lo(), dst->as_register());
1759 #else
1760     __ lcmp(left->as_register_hi(),  left->as_register_lo(),
1761             right->as_register_hi(), right->as_register_lo(),
1762             dst->as_register());
1763 #endif
1764   } else {
1765     ShouldNotReachHere();
1766   }
1767 }
1768 
1769 
1770 void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
1771   Assembler::Condition acond;
1772   switch (condition) {
1773     case lir_cond_equal:        acond = Assembler::equal;        break;
1774     case lir_cond_notEqual:     acond = Assembler::notEqual;     break;
1775     case lir_cond_less:         acond = Assembler::less;         break;
1776     case lir_cond_lessEqual:    acond = Assembler::lessEqual;    break;
1777     case lir_cond_greaterEqual: acond = Assembler::greaterEqual; break;
1778     case lir_cond_greater:      acond = Assembler::greater;      break;
1779     case lir_cond_aboveEqual:   acond = Assembler::greaterEqualUnsigned;      break;
1780     case lir_cond_belowEqual:   acond = Assembler::lessEqualUnsigned;      break;
1781     default:                         ShouldNotReachHere();
1782   };
1783 
1784   if (opr1->is_constant() && opr1->type() == T_INT) {
1785     Register dest = result->as_register();
1786     // load up first part of constant before branch
1787     // and do the rest in the delay slot.
1788     if (!Assembler::is_simm13(opr1->as_jint())) {
1789       __ sethi(opr1->as_jint(), dest);
1790     }
1791   } else if (opr1->is_constant()) {
1792     const2reg(opr1, result, lir_patch_none, NULL);
1793   } else if (opr1->is_register()) {
1794     reg2reg(opr1, result);
1795   } else if (opr1->is_stack()) {
1796     stack2reg(opr1, result, result->type());
1797   } else {
1798     ShouldNotReachHere();
1799   }
1800   Label skip;
1801 #ifdef _LP64
1802     if  (type == T_INT) {
1803       __ br(acond, false, Assembler::pt, skip);
1804     } else
1805 #endif
1806       __ brx(acond, false, Assembler::pt, skip); // checks icc on 32bit and xcc on 64bit
1807   if (opr1->is_constant() && opr1->type() == T_INT) {
1808     Register dest = result->as_register();
1809     if (Assembler::is_simm13(opr1->as_jint())) {
1810       __ delayed()->or3(G0, opr1->as_jint(), dest);
1811     } else {
1812       // the sethi has been done above, so just put in the low 10 bits
1813       __ delayed()->or3(dest, opr1->as_jint() & 0x3ff, dest);
1814     }
1815   } else {
1816     // can't do anything useful in the delay slot
1817     __ delayed()->nop();
1818   }
1819   if (opr2->is_constant()) {
1820     const2reg(opr2, result, lir_patch_none, NULL);
1821   } else if (opr2->is_register()) {
1822     reg2reg(opr2, result);
1823   } else if (opr2->is_stack()) {
1824     stack2reg(opr2, result, result->type());
1825   } else {
1826     ShouldNotReachHere();
1827   }
1828   __ bind(skip);
1829 }
1830 
1831 
1832 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
1833   assert(info == NULL, "unused on this code path");
1834   assert(left->is_register(), "wrong items state");
1835   assert(dest->is_register(), "wrong items state");
1836 
1837   if (right->is_register()) {
1838     if (dest->is_float_kind()) {
1839 
1840       FloatRegister lreg, rreg, res;
1841       FloatRegisterImpl::Width w;
1842       if (right->is_single_fpu()) {
1843         w = FloatRegisterImpl::S;
1844         lreg = left->as_float_reg();
1845         rreg = right->as_float_reg();
1846         res  = dest->as_float_reg();
1847       } else {
1848         w = FloatRegisterImpl::D;
1849         lreg = left->as_double_reg();
1850         rreg = right->as_double_reg();
1851         res  = dest->as_double_reg();
1852       }
1853 
1854       switch (code) {
1855         case lir_add: __ fadd(w, lreg, rreg, res); break;
1856         case lir_sub: __ fsub(w, lreg, rreg, res); break;
1857         case lir_mul: // fall through
1858         case lir_mul_strictfp: __ fmul(w, lreg, rreg, res); break;
1859         case lir_div: // fall through
1860         case lir_div_strictfp: __ fdiv(w, lreg, rreg, res); break;
1861         default: ShouldNotReachHere();
1862       }
1863 
1864     } else if (dest->is_double_cpu()) {
1865 #ifdef _LP64
1866       Register dst_lo = dest->as_register_lo();
1867       Register op1_lo = left->as_pointer_register();
1868       Register op2_lo = right->as_pointer_register();
1869 
1870       switch (code) {
1871         case lir_add:
1872           __ add(op1_lo, op2_lo, dst_lo);
1873           break;
1874 
1875         case lir_sub:
1876           __ sub(op1_lo, op2_lo, dst_lo);
1877           break;
1878 
1879         default: ShouldNotReachHere();
1880       }
1881 #else
1882       Register op1_lo = left->as_register_lo();
1883       Register op1_hi = left->as_register_hi();
1884       Register op2_lo = right->as_register_lo();
1885       Register op2_hi = right->as_register_hi();
1886       Register dst_lo = dest->as_register_lo();
1887       Register dst_hi = dest->as_register_hi();
1888 
1889       switch (code) {
1890         case lir_add:
1891           __ addcc(op1_lo, op2_lo, dst_lo);
1892           __ addc (op1_hi, op2_hi, dst_hi);
1893           break;
1894 
1895         case lir_sub:
1896           __ subcc(op1_lo, op2_lo, dst_lo);
1897           __ subc (op1_hi, op2_hi, dst_hi);
1898           break;
1899 
1900         default: ShouldNotReachHere();
1901       }
1902 #endif
1903     } else {
1904       assert (right->is_single_cpu(), "Just Checking");
1905 
1906       Register lreg = left->as_register();
1907       Register res  = dest->as_register();
1908       Register rreg = right->as_register();
1909       switch (code) {
1910         case lir_add:  __ add  (lreg, rreg, res); break;
1911         case lir_sub:  __ sub  (lreg, rreg, res); break;
1912         case lir_mul:  __ mult (lreg, rreg, res); break;
1913         default: ShouldNotReachHere();
1914       }
1915     }
1916   } else {
1917     assert (right->is_constant(), "must be constant");
1918 
1919     if (dest->is_single_cpu()) {
1920       Register lreg = left->as_register();
1921       Register res  = dest->as_register();
1922       int    simm13 = right->as_constant_ptr()->as_jint();
1923 
1924       switch (code) {
1925         case lir_add:  __ add  (lreg, simm13, res); break;
1926         case lir_sub:  __ sub  (lreg, simm13, res); break;
1927         case lir_mul:  __ mult (lreg, simm13, res); break;
1928         default: ShouldNotReachHere();
1929       }
1930     } else {
1931       Register lreg = left->as_pointer_register();
1932       Register res  = dest->as_register_lo();
1933       long con = right->as_constant_ptr()->as_jlong();
1934       assert(Assembler::is_simm13(con), "must be simm13");
1935 
1936       switch (code) {
1937         case lir_add:  __ add  (lreg, (int)con, res); break;
1938         case lir_sub:  __ sub  (lreg, (int)con, res); break;
1939         case lir_mul:  __ mult (lreg, (int)con, res); break;
1940         default: ShouldNotReachHere();
1941       }
1942     }
1943   }
1944 }
1945 
1946 
1947 void LIR_Assembler::fpop() {
1948   // do nothing
1949 }
1950 
1951 
1952 void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
1953   switch (code) {
1954     case lir_sin:
1955     case lir_tan:
1956     case lir_cos: {
1957       assert(thread->is_valid(), "preserve the thread object for performance reasons");
1958       assert(dest->as_double_reg() == F0, "the result will be in f0/f1");
1959       break;
1960     }
1961     case lir_sqrt: {
1962       assert(!thread->is_valid(), "there is no need for a thread_reg for dsqrt");
1963       FloatRegister src_reg = value->as_double_reg();
1964       FloatRegister dst_reg = dest->as_double_reg();
1965       __ fsqrt(FloatRegisterImpl::D, src_reg, dst_reg);
1966       break;
1967     }
1968     case lir_abs: {
1969       assert(!thread->is_valid(), "there is no need for a thread_reg for fabs");
1970       FloatRegister src_reg = value->as_double_reg();
1971       FloatRegister dst_reg = dest->as_double_reg();
1972       __ fabs(FloatRegisterImpl::D, src_reg, dst_reg);
1973       break;
1974     }
1975     default: {
1976       ShouldNotReachHere();
1977       break;
1978     }
1979   }
1980 }
1981 
1982 
1983 void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
1984   if (right->is_constant()) {
1985     if (dest->is_single_cpu()) {
1986       int simm13 = right->as_constant_ptr()->as_jint();
1987       switch (code) {
1988         case lir_logic_and:   __ and3 (left->as_register(), simm13, dest->as_register()); break;
1989         case lir_logic_or:    __ or3  (left->as_register(), simm13, dest->as_register()); break;
1990         case lir_logic_xor:   __ xor3 (left->as_register(), simm13, dest->as_register()); break;
1991         default: ShouldNotReachHere();
1992       }
1993     } else {
1994       long c = right->as_constant_ptr()->as_jlong();
1995       assert(c == (int)c && Assembler::is_simm13(c), "out of range");
1996       int simm13 = (int)c;
1997       switch (code) {
1998         case lir_logic_and:
1999 #ifndef _LP64
2000           __ and3 (left->as_register_hi(), 0,      dest->as_register_hi());
2001 #endif
2002           __ and3 (left->as_register_lo(), simm13, dest->as_register_lo());
2003           break;
2004 
2005         case lir_logic_or:
2006 #ifndef _LP64
2007           __ or3 (left->as_register_hi(), 0,      dest->as_register_hi());
2008 #endif
2009           __ or3 (left->as_register_lo(), simm13, dest->as_register_lo());
2010           break;
2011 
2012         case lir_logic_xor:
2013 #ifndef _LP64
2014           __ xor3 (left->as_register_hi(), 0,      dest->as_register_hi());
2015 #endif
2016           __ xor3 (left->as_register_lo(), simm13, dest->as_register_lo());
2017           break;
2018 
2019         default: ShouldNotReachHere();
2020       }
2021     }
2022   } else {
2023     assert(right->is_register(), "right should be in register");
2024 
2025     if (dest->is_single_cpu()) {
2026       switch (code) {
2027         case lir_logic_and:   __ and3 (left->as_register(), right->as_register(), dest->as_register()); break;
2028         case lir_logic_or:    __ or3  (left->as_register(), right->as_register(), dest->as_register()); break;
2029         case lir_logic_xor:   __ xor3 (left->as_register(), right->as_register(), dest->as_register()); break;
2030         default: ShouldNotReachHere();
2031       }
2032     } else {
2033 #ifdef _LP64
2034       Register l = (left->is_single_cpu() && left->is_oop_register()) ? left->as_register() :
2035                                                                         left->as_register_lo();
2036       Register r = (right->is_single_cpu() && right->is_oop_register()) ? right->as_register() :
2037                                                                           right->as_register_lo();
2038 
2039       switch (code) {
2040         case lir_logic_and: __ and3 (l, r, dest->as_register_lo()); break;
2041         case lir_logic_or:  __ or3  (l, r, dest->as_register_lo()); break;
2042         case lir_logic_xor: __ xor3 (l, r, dest->as_register_lo()); break;
2043         default: ShouldNotReachHere();
2044       }
2045 #else
2046       switch (code) {
2047         case lir_logic_and:
2048           __ and3 (left->as_register_hi(), right->as_register_hi(), dest->as_register_hi());
2049           __ and3 (left->as_register_lo(), right->as_register_lo(), dest->as_register_lo());
2050           break;
2051 
2052         case lir_logic_or:
2053           __ or3 (left->as_register_hi(), right->as_register_hi(), dest->as_register_hi());
2054           __ or3 (left->as_register_lo(), right->as_register_lo(), dest->as_register_lo());
2055           break;
2056 
2057         case lir_logic_xor:
2058           __ xor3 (left->as_register_hi(), right->as_register_hi(), dest->as_register_hi());
2059           __ xor3 (left->as_register_lo(), right->as_register_lo(), dest->as_register_lo());
2060           break;
2061 
2062         default: ShouldNotReachHere();
2063       }
2064 #endif
2065     }
2066   }
2067 }
2068 
2069 
2070 int LIR_Assembler::shift_amount(BasicType t) {
2071   int elem_size = type2aelembytes(t);
2072   switch (elem_size) {
2073     case 1 : return 0;
2074     case 2 : return 1;
2075     case 4 : return 2;
2076     case 8 : return 3;
2077   }
2078   ShouldNotReachHere();
2079   return -1;
2080 }
2081 
2082 
2083 void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
2084   assert(exceptionOop->as_register() == Oexception, "should match");
2085   assert(exceptionPC->as_register() == Oissuing_pc, "should match");
2086 
2087   info->add_register_oop(exceptionOop);
2088 
2089   // reuse the debug info from the safepoint poll for the throw op itself
2090   address pc_for_athrow  = __ pc();
2091   int pc_for_athrow_offset = __ offset();
2092   RelocationHolder rspec = internal_word_Relocation::spec(pc_for_athrow);
2093   __ set(pc_for_athrow, Oissuing_pc, rspec);
2094   add_call_info(pc_for_athrow_offset, info); // for exception handler
2095 
2096   __ call(Runtime1::entry_for(Runtime1::handle_exception_id), relocInfo::runtime_call_type);
2097   __ delayed()->nop();
2098 }
2099 
2100 
2101 void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
2102   assert(exceptionOop->as_register() == Oexception, "should match");
2103 
2104   __ br(Assembler::always, false, Assembler::pt, _unwind_handler_entry);
2105   __ delayed()->nop();
2106 }
2107 
2108 void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
2109   Register src = op->src()->as_register();
2110   Register dst = op->dst()->as_register();
2111   Register src_pos = op->src_pos()->as_register();
2112   Register dst_pos = op->dst_pos()->as_register();
2113   Register length  = op->length()->as_register();
2114   Register tmp = op->tmp()->as_register();
2115   Register tmp2 = O7;
2116 
2117   int flags = op->flags();
2118   ciArrayKlass* default_type = op->expected_type();
2119   BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
2120   if (basic_type == T_ARRAY) basic_type = T_OBJECT;
2121 
2122 #ifdef _LP64
2123   // higher 32bits must be null
2124   __ sra(dst_pos, 0, dst_pos);
2125   __ sra(src_pos, 0, src_pos);
2126   __ sra(length, 0, length);
2127 #endif
2128 
2129   // set up the arraycopy stub information
2130   ArrayCopyStub* stub = op->stub();
2131 
2132   // always do stub if no type information is available.  it's ok if
2133   // the known type isn't loaded since the code sanity checks
2134   // in debug mode and the type isn't required when we know the exact type
2135   // also check that the type is an array type.
2136   if (op->expected_type() == NULL) {
2137     __ mov(src,     O0);
2138     __ mov(src_pos, O1);
2139     __ mov(dst,     O2);
2140     __ mov(dst_pos, O3);
2141     __ mov(length,  O4);
2142     address copyfunc_addr = StubRoutines::generic_arraycopy();
2143 
2144     if (copyfunc_addr == NULL) { // Use C version if stub was not generated
2145       __ call_VM_leaf(tmp, CAST_FROM_FN_PTR(address, Runtime1::arraycopy));
2146     } else {
2147 #ifndef PRODUCT
2148       if (PrintC1Statistics) {
2149         address counter = (address)&Runtime1::_generic_arraycopystub_cnt;
2150         __ inc_counter(counter, G1, G3);
2151       }
2152 #endif
2153       __ call_VM_leaf(tmp, copyfunc_addr);
2154     }
2155 
2156     if (copyfunc_addr != NULL) {
2157       __ xor3(O0, -1, tmp);
2158       __ sub(length, tmp, length);
2159       __ add(src_pos, tmp, src_pos);
2160       __ cmp_zero_and_br(Assembler::less, O0, *stub->entry());
2161       __ delayed()->add(dst_pos, tmp, dst_pos);
2162     } else {
2163       __ cmp_zero_and_br(Assembler::less, O0, *stub->entry());
2164       __ delayed()->nop();
2165     }
2166     __ bind(*stub->continuation());
2167     return;
2168   }
2169 
2170   assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point");
2171 
2172   // make sure src and dst are non-null and load array length
2173   if (flags & LIR_OpArrayCopy::src_null_check) {
2174     __ tst(src);
2175     __ brx(Assembler::equal, false, Assembler::pn, *stub->entry());
2176     __ delayed()->nop();
2177   }
2178 
2179   if (flags & LIR_OpArrayCopy::dst_null_check) {
2180     __ tst(dst);
2181     __ brx(Assembler::equal, false, Assembler::pn, *stub->entry());
2182     __ delayed()->nop();
2183   }
2184 
2185   if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
2186     // test src_pos register
2187     __ cmp_zero_and_br(Assembler::less, src_pos, *stub->entry());
2188     __ delayed()->nop();
2189   }
2190 
2191   if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
2192     // test dst_pos register
2193     __ cmp_zero_and_br(Assembler::less, dst_pos, *stub->entry());
2194     __ delayed()->nop();
2195   }
2196 
2197   if (flags & LIR_OpArrayCopy::length_positive_check) {
2198     // make sure length isn't negative
2199     __ cmp_zero_and_br(Assembler::less, length, *stub->entry());
2200     __ delayed()->nop();
2201   }
2202 
2203   if (flags & LIR_OpArrayCopy::src_range_check) {
2204     __ ld(src, arrayOopDesc::length_offset_in_bytes(), tmp2);
2205     __ add(length, src_pos, tmp);
2206     __ cmp(tmp2, tmp);
2207     __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry());
2208     __ delayed()->nop();
2209   }
2210 
2211   if (flags & LIR_OpArrayCopy::dst_range_check) {
2212     __ ld(dst, arrayOopDesc::length_offset_in_bytes(), tmp2);
2213     __ add(length, dst_pos, tmp);
2214     __ cmp(tmp2, tmp);
2215     __ br(Assembler::carrySet, false, Assembler::pn, *stub->entry());
2216     __ delayed()->nop();
2217   }
2218 
2219   int shift = shift_amount(basic_type);
2220 
2221   if (flags & LIR_OpArrayCopy::type_check) {
2222     // We don't know the array types are compatible
2223     if (basic_type != T_OBJECT) {
2224       // Simple test for basic type arrays
2225       if (UseCompressedKlassPointers) {
2226         // We don't need decode because we just need to compare
2227         __ lduw(src, oopDesc::klass_offset_in_bytes(), tmp);
2228         __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2229         __ cmp(tmp, tmp2);
2230         __ br(Assembler::notEqual, false, Assembler::pt, *stub->entry());
2231       } else {
2232         __ ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp);
2233         __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2234         __ cmp(tmp, tmp2);
2235         __ brx(Assembler::notEqual, false, Assembler::pt, *stub->entry());
2236       }
2237       __ delayed()->nop();
2238     } else {
2239       // For object arrays, if src is a sub class of dst then we can
2240       // safely do the copy.
2241       address copyfunc_addr = StubRoutines::checkcast_arraycopy();
2242 
2243       Label cont, slow;
2244       assert_different_registers(tmp, tmp2, G3, G1);
2245 
2246       __ load_klass(src, G3);
2247       __ load_klass(dst, G1);
2248 
2249       __ check_klass_subtype_fast_path(G3, G1, tmp, tmp2, &cont, copyfunc_addr == NULL ? stub->entry() : &slow, NULL);
2250 
2251       __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
2252       __ delayed()->nop();
2253 
2254       __ cmp(G3, 0);
2255       if (copyfunc_addr != NULL) { // use stub if available
2256         // src is not a sub class of dst so we have to do a
2257         // per-element check.
2258         __ br(Assembler::notEqual, false, Assembler::pt, cont);
2259         __ delayed()->nop();
2260 
2261         __ bind(slow);
2262 
2263         int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
2264         if ((flags & mask) != mask) {
2265           // Check that at least both of them object arrays.
2266           assert(flags & mask, "one of the two should be known to be an object array");
2267 
2268           if (!(flags & LIR_OpArrayCopy::src_objarray)) {
2269             __ load_klass(src, tmp);
2270           } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
2271             __ load_klass(dst, tmp);
2272           }
2273           int lh_offset = in_bytes(Klass::layout_helper_offset());
2274 
2275           __ lduw(tmp, lh_offset, tmp2);
2276 
2277           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
2278           __ set(objArray_lh, tmp);
2279           __ cmp(tmp, tmp2);
2280           __ br(Assembler::notEqual, false, Assembler::pt,  *stub->entry());
2281           __ delayed()->nop();
2282         }
2283 
2284         Register src_ptr = O0;
2285         Register dst_ptr = O1;
2286         Register len     = O2;
2287         Register chk_off = O3;
2288         Register super_k = O4;
2289 
2290         __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr);
2291         if (shift == 0) {
2292           __ add(src_ptr, src_pos, src_ptr);
2293         } else {
2294           __ sll(src_pos, shift, tmp);
2295           __ add(src_ptr, tmp, src_ptr);
2296         }
2297 
2298         __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr);
2299         if (shift == 0) {
2300           __ add(dst_ptr, dst_pos, dst_ptr);
2301         } else {
2302           __ sll(dst_pos, shift, tmp);
2303           __ add(dst_ptr, tmp, dst_ptr);
2304         }
2305         __ mov(length, len);
2306         __ load_klass(dst, tmp);
2307 
2308         int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
2309         __ ld_ptr(tmp, ek_offset, super_k);
2310 
2311         int sco_offset = in_bytes(Klass::super_check_offset_offset());
2312         __ lduw(super_k, sco_offset, chk_off);
2313 
2314         __ call_VM_leaf(tmp, copyfunc_addr);
2315 
2316 #ifndef PRODUCT
2317         if (PrintC1Statistics) {
2318           Label failed;
2319           __ br_notnull_short(O0, Assembler::pn, failed);
2320           __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3);
2321           __ bind(failed);
2322         }
2323 #endif
2324 
2325         __ br_null(O0, false, Assembler::pt,  *stub->continuation());
2326         __ delayed()->xor3(O0, -1, tmp);
2327 
2328 #ifndef PRODUCT
2329         if (PrintC1Statistics) {
2330           __ inc_counter((address)&Runtime1::_arraycopy_checkcast_attempt_cnt, G1, G3);
2331         }
2332 #endif
2333 
2334         __ sub(length, tmp, length);
2335         __ add(src_pos, tmp, src_pos);
2336         __ br(Assembler::always, false, Assembler::pt, *stub->entry());
2337         __ delayed()->add(dst_pos, tmp, dst_pos);
2338 
2339         __ bind(cont);
2340       } else {
2341         __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
2342         __ delayed()->nop();
2343         __ bind(cont);
2344       }
2345     }
2346   }
2347 
2348 #ifdef ASSERT
2349   if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
2350     // Sanity check the known type with the incoming class.  For the
2351     // primitive case the types must match exactly with src.klass and
2352     // dst.klass each exactly matching the default type.  For the
2353     // object array case, if no type check is needed then either the
2354     // dst type is exactly the expected type and the src type is a
2355     // subtype which we can't check or src is the same array as dst
2356     // but not necessarily exactly of type default_type.
2357     Label known_ok, halt;
2358     metadata2reg(op->expected_type()->constant_encoding(), tmp);
2359     if (UseCompressedKlassPointers) {
2360       // tmp holds the default type. It currently comes uncompressed after the
2361       // load of a constant, so encode it.
2362       __ encode_klass_not_null(tmp);
2363       // load the raw value of the dst klass, since we will be comparing
2364       // uncompressed values directly.
2365       __ lduw(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2366       if (basic_type != T_OBJECT) {
2367         __ cmp(tmp, tmp2);
2368         __ br(Assembler::notEqual, false, Assembler::pn, halt);
2369         // load the raw value of the src klass.
2370         __ delayed()->lduw(src, oopDesc::klass_offset_in_bytes(), tmp2);
2371         __ cmp_and_br_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok);
2372       } else {
2373         __ cmp(tmp, tmp2);
2374         __ br(Assembler::equal, false, Assembler::pn, known_ok);
2375         __ delayed()->cmp(src, dst);
2376         __ brx(Assembler::equal, false, Assembler::pn, known_ok);
2377         __ delayed()->nop();
2378       }
2379     } else {
2380       __ ld_ptr(dst, oopDesc::klass_offset_in_bytes(), tmp2);
2381       if (basic_type != T_OBJECT) {
2382         __ cmp(tmp, tmp2);
2383         __ brx(Assembler::notEqual, false, Assembler::pn, halt);
2384         __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp2);
2385         __ cmp_and_brx_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok);
2386       } else {
2387         __ cmp(tmp, tmp2);
2388         __ brx(Assembler::equal, false, Assembler::pn, known_ok);
2389         __ delayed()->cmp(src, dst);
2390         __ brx(Assembler::equal, false, Assembler::pn, known_ok);
2391         __ delayed()->nop();
2392       }
2393     }
2394     __ bind(halt);
2395     __ stop("incorrect type information in arraycopy");
2396     __ bind(known_ok);
2397   }
2398 #endif
2399 
2400 #ifndef PRODUCT
2401   if (PrintC1Statistics) {
2402     address counter = Runtime1::arraycopy_count_address(basic_type);
2403     __ inc_counter(counter, G1, G3);
2404   }
2405 #endif
2406 
2407   Register src_ptr = O0;
2408   Register dst_ptr = O1;
2409   Register len     = O2;
2410 
2411   __ add(src, arrayOopDesc::base_offset_in_bytes(basic_type), src_ptr);
2412   if (shift == 0) {
2413     __ add(src_ptr, src_pos, src_ptr);
2414   } else {
2415     __ sll(src_pos, shift, tmp);
2416     __ add(src_ptr, tmp, src_ptr);
2417   }
2418 
2419   __ add(dst, arrayOopDesc::base_offset_in_bytes(basic_type), dst_ptr);
2420   if (shift == 0) {
2421     __ add(dst_ptr, dst_pos, dst_ptr);
2422   } else {
2423     __ sll(dst_pos, shift, tmp);
2424     __ add(dst_ptr, tmp, dst_ptr);
2425   }
2426 
2427   bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
2428   bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
2429   const char *name;
2430   address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
2431 
2432   // arraycopy stubs takes a length in number of elements, so don't scale it.
2433   __ mov(length, len);
2434   __ call_VM_leaf(tmp, entry);
2435 
2436   __ bind(*stub->continuation());
2437 }
2438 
2439 
2440 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
2441   if (dest->is_single_cpu()) {
2442 #ifdef _LP64
2443     if (left->type() == T_OBJECT) {
2444       switch (code) {
2445         case lir_shl:  __ sllx  (left->as_register(), count->as_register(), dest->as_register()); break;
2446         case lir_shr:  __ srax  (left->as_register(), count->as_register(), dest->as_register()); break;
2447         case lir_ushr: __ srl   (left->as_register(), count->as_register(), dest->as_register()); break;
2448         default: ShouldNotReachHere();
2449       }
2450     } else
2451 #endif
2452       switch (code) {
2453         case lir_shl:  __ sll   (left->as_register(), count->as_register(), dest->as_register()); break;
2454         case lir_shr:  __ sra   (left->as_register(), count->as_register(), dest->as_register()); break;
2455         case lir_ushr: __ srl   (left->as_register(), count->as_register(), dest->as_register()); break;
2456         default: ShouldNotReachHere();
2457       }
2458   } else {
2459 #ifdef _LP64
2460     switch (code) {
2461       case lir_shl:  __ sllx  (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break;
2462       case lir_shr:  __ srax  (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break;
2463       case lir_ushr: __ srlx  (left->as_register_lo(), count->as_register(), dest->as_register_lo()); break;
2464       default: ShouldNotReachHere();
2465     }
2466 #else
2467     switch (code) {
2468       case lir_shl:  __ lshl  (left->as_register_hi(), left->as_register_lo(), count->as_register(), dest->as_register_hi(), dest->as_register_lo(), G3_scratch); break;
2469       case lir_shr:  __ lshr  (left->as_register_hi(), left->as_register_lo(), count->as_register(), dest->as_register_hi(), dest->as_register_lo(), G3_scratch); break;
2470       case lir_ushr: __ lushr (left->as_register_hi(), left->as_register_lo(), count->as_register(), dest->as_register_hi(), dest->as_register_lo(), G3_scratch); break;
2471       default: ShouldNotReachHere();
2472     }
2473 #endif
2474   }
2475 }
2476 
2477 
2478 void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
2479 #ifdef _LP64
2480   if (left->type() == T_OBJECT) {
2481     count = count & 63;  // shouldn't shift by more than sizeof(intptr_t)
2482     Register l = left->as_register();
2483     Register d = dest->as_register_lo();
2484     switch (code) {
2485       case lir_shl:  __ sllx  (l, count, d); break;
2486       case lir_shr:  __ srax  (l, count, d); break;
2487       case lir_ushr: __ srlx  (l, count, d); break;
2488       default: ShouldNotReachHere();
2489     }
2490     return;
2491   }
2492 #endif
2493 
2494   if (dest->is_single_cpu()) {
2495     count = count & 0x1F; // Java spec
2496     switch (code) {
2497       case lir_shl:  __ sll   (left->as_register(), count, dest->as_register()); break;
2498       case lir_shr:  __ sra   (left->as_register(), count, dest->as_register()); break;
2499       case lir_ushr: __ srl   (left->as_register(), count, dest->as_register()); break;
2500       default: ShouldNotReachHere();
2501     }
2502   } else if (dest->is_double_cpu()) {
2503     count = count & 63; // Java spec
2504     switch (code) {
2505       case lir_shl:  __ sllx  (left->as_pointer_register(), count, dest->as_pointer_register()); break;
2506       case lir_shr:  __ srax  (left->as_pointer_register(), count, dest->as_pointer_register()); break;
2507       case lir_ushr: __ srlx  (left->as_pointer_register(), count, dest->as_pointer_register()); break;
2508       default: ShouldNotReachHere();
2509     }
2510   } else {
2511     ShouldNotReachHere();
2512   }
2513 }
2514 
2515 
2516 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
2517   assert(op->tmp1()->as_register()  == G1 &&
2518          op->tmp2()->as_register()  == G3 &&
2519          op->tmp3()->as_register()  == G4 &&
2520          op->obj()->as_register()   == O0 &&
2521          op->klass()->as_register() == G5, "must be");
2522   if (op->init_check()) {
2523     __ ldub(op->klass()->as_register(),
2524           in_bytes(InstanceKlass::init_state_offset()),
2525           op->tmp1()->as_register());
2526     add_debug_info_for_null_check_here(op->stub()->info());
2527     __ cmp(op->tmp1()->as_register(), InstanceKlass::fully_initialized);
2528     __ br(Assembler::notEqual, false, Assembler::pn, *op->stub()->entry());
2529     __ delayed()->nop();
2530   }
2531   __ allocate_object(op->obj()->as_register(),
2532                      op->tmp1()->as_register(),
2533                      op->tmp2()->as_register(),
2534                      op->tmp3()->as_register(),
2535                      op->header_size(),
2536                      op->object_size(),
2537                      op->klass()->as_register(),
2538                      *op->stub()->entry());
2539   __ bind(*op->stub()->continuation());
2540   __ verify_oop(op->obj()->as_register());
2541 }
2542 
2543 
2544 void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
2545   assert(op->tmp1()->as_register()  == G1 &&
2546          op->tmp2()->as_register()  == G3 &&
2547          op->tmp3()->as_register()  == G4 &&
2548          op->tmp4()->as_register()  == O1 &&
2549          op->klass()->as_register() == G5, "must be");
2550 
2551   LP64_ONLY( __ signx(op->len()->as_register()); )
2552   if (UseSlowPath ||
2553       (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
2554       (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
2555     __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry());
2556     __ delayed()->nop();
2557   } else {
2558     __ allocate_array(op->obj()->as_register(),
2559                       op->len()->as_register(),
2560                       op->tmp1()->as_register(),
2561                       op->tmp2()->as_register(),
2562                       op->tmp3()->as_register(),
2563                       arrayOopDesc::header_size(op->type()),
2564                       type2aelembytes(op->type()),
2565                       op->klass()->as_register(),
2566                       *op->stub()->entry());
2567   }
2568   __ bind(*op->stub()->continuation());
2569 }
2570 
2571 
2572 void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
2573                                         ciMethodData *md, ciProfileData *data,
2574                                         Register recv, Register tmp1, Label* update_done) {
2575   uint i;
2576   for (i = 0; i < VirtualCallData::row_limit(); i++) {
2577     Label next_test;
2578     // See if the receiver is receiver[n].
2579     Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
2580                           mdo_offset_bias);
2581     __ ld_ptr(receiver_addr, tmp1);
2582     __ verify_oop(tmp1);
2583     __ cmp_and_brx_short(recv, tmp1, Assembler::notEqual, Assembler::pt, next_test);
2584     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
2585                       mdo_offset_bias);
2586     __ ld_ptr(data_addr, tmp1);
2587     __ add(tmp1, DataLayout::counter_increment, tmp1);
2588     __ st_ptr(tmp1, data_addr);
2589     __ ba(*update_done);
2590     __ delayed()->nop();
2591     __ bind(next_test);
2592   }
2593 
2594   // Didn't find receiver; find next empty slot and fill it in
2595   for (i = 0; i < VirtualCallData::row_limit(); i++) {
2596     Label next_test;
2597     Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
2598                       mdo_offset_bias);
2599     __ ld_ptr(recv_addr, tmp1);
2600     __ br_notnull_short(tmp1, Assembler::pt, next_test);
2601     __ st_ptr(recv, recv_addr);
2602     __ set(DataLayout::counter_increment, tmp1);
2603     __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
2604               mdo_offset_bias);
2605     __ ba(*update_done);
2606     __ delayed()->nop();
2607     __ bind(next_test);
2608   }
2609 }
2610 
2611 
2612 void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
2613                                     ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
2614   md = method->method_data_or_null();
2615   assert(md != NULL, "Sanity");
2616   data = md->bci_to_data(bci);
2617   assert(data != NULL,       "need data for checkcast");
2618   assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
2619   if (!Assembler::is_simm13(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
2620     // The offset is large so bias the mdo by the base of the slot so
2621     // that the ld can use simm13s to reference the slots of the data
2622     mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
2623   }
2624 }
2625 
2626 void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
2627   // we always need a stub for the failure case.
2628   CodeStub* stub = op->stub();
2629   Register obj = op->object()->as_register();
2630   Register k_RInfo = op->tmp1()->as_register();
2631   Register klass_RInfo = op->tmp2()->as_register();
2632   Register dst = op->result_opr()->as_register();
2633   Register Rtmp1 = op->tmp3()->as_register();
2634   ciKlass* k = op->klass();
2635 
2636 
2637   if (obj == k_RInfo) {
2638     k_RInfo = klass_RInfo;
2639     klass_RInfo = obj;
2640   }
2641 
2642   ciMethodData* md;
2643   ciProfileData* data;
2644   int mdo_offset_bias = 0;
2645   if (op->should_profile()) {
2646     ciMethod* method = op->profiled_method();
2647     assert(method != NULL, "Should have method");
2648     setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
2649 
2650     Label not_null;
2651     __ br_notnull_short(obj, Assembler::pn, not_null);
2652     Register mdo      = k_RInfo;
2653     Register data_val = Rtmp1;
2654     metadata2reg(md->constant_encoding(), mdo);
2655     if (mdo_offset_bias > 0) {
2656       __ set(mdo_offset_bias, data_val);
2657       __ add(mdo, data_val, mdo);
2658     }
2659     Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
2660     __ ldub(flags_addr, data_val);
2661     __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
2662     __ stb(data_val, flags_addr);
2663     __ ba(*obj_is_null);
2664     __ delayed()->nop();
2665     __ bind(not_null);
2666   } else {
2667     __ br_null(obj, false, Assembler::pn, *obj_is_null);
2668     __ delayed()->nop();
2669   }
2670 
2671   Label profile_cast_failure, profile_cast_success;
2672   Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
2673   Label *success_target = op->should_profile() ? &profile_cast_success : success;
2674 
2675   // patching may screw with our temporaries on sparc,
2676   // so let's do it before loading the class
2677   if (k->is_loaded()) {
2678     metadata2reg(k->constant_encoding(), k_RInfo);
2679   } else {
2680     klass2reg_with_patching(k_RInfo, op->info_for_patch());
2681   }
2682   assert(obj != k_RInfo, "must be different");
2683 
2684   // get object class
2685   // not a safepoint as obj null check happens earlier
2686   __ load_klass(obj, klass_RInfo);
2687   if (op->fast_check()) {
2688     assert_different_registers(klass_RInfo, k_RInfo);
2689     __ cmp(k_RInfo, klass_RInfo);
2690     __ brx(Assembler::notEqual, false, Assembler::pt, *failure_target);
2691     __ delayed()->nop();
2692   } else {
2693     bool need_slow_path = true;
2694     if (k->is_loaded()) {
2695       if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset()))
2696         need_slow_path = false;
2697       // perform the fast part of the checking logic
2698       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
2699                                        (need_slow_path ? success_target : NULL),
2700                                        failure_target, NULL,
2701                                        RegisterOrConstant(k->super_check_offset()));
2702     } else {
2703       // perform the fast part of the checking logic
2704       __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target,
2705                                        failure_target, NULL);
2706     }
2707     if (need_slow_path) {
2708       // call out-of-line instance of __ check_klass_subtype_slow_path(...):
2709       assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
2710       __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
2711       __ delayed()->nop();
2712       __ cmp(G3, 0);
2713       __ br(Assembler::equal, false, Assembler::pn, *failure_target);
2714       __ delayed()->nop();
2715       // Fall through to success case
2716     }
2717   }
2718 
2719   if (op->should_profile()) {
2720     Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
2721     assert_different_registers(obj, mdo, recv, tmp1);
2722     __ bind(profile_cast_success);
2723     metadata2reg(md->constant_encoding(), mdo);
2724     if (mdo_offset_bias > 0) {
2725       __ set(mdo_offset_bias, tmp1);
2726       __ add(mdo, tmp1, mdo);
2727     }
2728     __ load_klass(obj, recv);
2729     type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success);
2730     // Jump over the failure case
2731     __ ba(*success);
2732     __ delayed()->nop();
2733     // Cast failure case
2734     __ bind(profile_cast_failure);
2735     metadata2reg(md->constant_encoding(), mdo);
2736     if (mdo_offset_bias > 0) {
2737       __ set(mdo_offset_bias, tmp1);
2738       __ add(mdo, tmp1, mdo);
2739     }
2740     Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
2741     __ ld_ptr(data_addr, tmp1);
2742     __ sub(tmp1, DataLayout::counter_increment, tmp1);
2743     __ st_ptr(tmp1, data_addr);
2744     __ ba(*failure);
2745     __ delayed()->nop();
2746   }
2747   __ ba(*success);
2748   __ delayed()->nop();
2749 }
2750 
2751 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
2752   LIR_Code code = op->code();
2753   if (code == lir_store_check) {
2754     Register value = op->object()->as_register();
2755     Register array = op->array()->as_register();
2756     Register k_RInfo = op->tmp1()->as_register();
2757     Register klass_RInfo = op->tmp2()->as_register();
2758     Register Rtmp1 = op->tmp3()->as_register();
2759 
2760     __ verify_oop(value);
2761     CodeStub* stub = op->stub();
2762     // check if it needs to be profiled
2763     ciMethodData* md;
2764     ciProfileData* data;
2765     int mdo_offset_bias = 0;
2766     if (op->should_profile()) {
2767       ciMethod* method = op->profiled_method();
2768       assert(method != NULL, "Should have method");
2769       setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
2770     }
2771     Label profile_cast_success, profile_cast_failure, done;
2772     Label *success_target = op->should_profile() ? &profile_cast_success : &done;
2773     Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
2774 
2775     if (op->should_profile()) {
2776       Label not_null;
2777       __ br_notnull_short(value, Assembler::pn, not_null);
2778       Register mdo      = k_RInfo;
2779       Register data_val = Rtmp1;
2780       metadata2reg(md->constant_encoding(), mdo);
2781       if (mdo_offset_bias > 0) {
2782         __ set(mdo_offset_bias, data_val);
2783         __ add(mdo, data_val, mdo);
2784       }
2785       Address flags_addr(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias);
2786       __ ldub(flags_addr, data_val);
2787       __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
2788       __ stb(data_val, flags_addr);
2789       __ ba_short(done);
2790       __ bind(not_null);
2791     } else {
2792       __ br_null_short(value, Assembler::pn, done);
2793     }
2794     add_debug_info_for_null_check_here(op->info_for_exception());
2795     __ load_klass(array, k_RInfo);
2796     __ load_klass(value, klass_RInfo);
2797 
2798     // get instance klass
2799     __ ld_ptr(Address(k_RInfo, ObjArrayKlass::element_klass_offset()), k_RInfo);
2800     // perform the fast part of the checking logic
2801     __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL);
2802 
2803     // call out-of-line instance of __ check_klass_subtype_slow_path(...):
2804     assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
2805     __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
2806     __ delayed()->nop();
2807     __ cmp(G3, 0);
2808     __ br(Assembler::equal, false, Assembler::pn, *failure_target);
2809     __ delayed()->nop();
2810     // fall through to the success case
2811 
2812     if (op->should_profile()) {
2813       Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
2814       assert_different_registers(value, mdo, recv, tmp1);
2815       __ bind(profile_cast_success);
2816       metadata2reg(md->constant_encoding(), mdo);
2817       if (mdo_offset_bias > 0) {
2818         __ set(mdo_offset_bias, tmp1);
2819         __ add(mdo, tmp1, mdo);
2820       }
2821       __ load_klass(value, recv);
2822       type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done);
2823       __ ba_short(done);
2824       // Cast failure case
2825       __ bind(profile_cast_failure);
2826       metadata2reg(md->constant_encoding(), mdo);
2827       if (mdo_offset_bias > 0) {
2828         __ set(mdo_offset_bias, tmp1);
2829         __ add(mdo, tmp1, mdo);
2830       }
2831       Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
2832       __ ld_ptr(data_addr, tmp1);
2833       __ sub(tmp1, DataLayout::counter_increment, tmp1);
2834       __ st_ptr(tmp1, data_addr);
2835       __ ba(*stub->entry());
2836       __ delayed()->nop();
2837     }
2838     __ bind(done);
2839   } else if (code == lir_checkcast) {
2840     Register obj = op->object()->as_register();
2841     Register dst = op->result_opr()->as_register();
2842     Label success;
2843     emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
2844     __ bind(success);
2845     __ mov(obj, dst);
2846   } else if (code == lir_instanceof) {
2847     Register obj = op->object()->as_register();
2848     Register dst = op->result_opr()->as_register();
2849     Label success, failure, done;
2850     emit_typecheck_helper(op, &success, &failure, &failure);
2851     __ bind(failure);
2852     __ set(0, dst);
2853     __ ba_short(done);
2854     __ bind(success);
2855     __ set(1, dst);
2856     __ bind(done);
2857   } else {
2858     ShouldNotReachHere();
2859   }
2860 
2861 }
2862 
2863 
2864 void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
2865   if (op->code() == lir_cas_long) {
2866     assert(VM_Version::supports_cx8(), "wrong machine");
2867     Register addr = op->addr()->as_pointer_register();
2868     Register cmp_value_lo = op->cmp_value()->as_register_lo();
2869     Register cmp_value_hi = op->cmp_value()->as_register_hi();
2870     Register new_value_lo = op->new_value()->as_register_lo();
2871     Register new_value_hi = op->new_value()->as_register_hi();
2872     Register t1 = op->tmp1()->as_register();
2873     Register t2 = op->tmp2()->as_register();
2874 #ifdef _LP64
2875     __ mov(cmp_value_lo, t1);
2876     __ mov(new_value_lo, t2);
2877     // perform the compare and swap operation
2878     __ casx(addr, t1, t2);
2879     // generate condition code - if the swap succeeded, t2 ("new value" reg) was
2880     // overwritten with the original value in "addr" and will be equal to t1.
2881     __ cmp(t1, t2);
2882 #else
2883     // move high and low halves of long values into single registers
2884     __ sllx(cmp_value_hi, 32, t1);         // shift high half into temp reg
2885     __ srl(cmp_value_lo, 0, cmp_value_lo); // clear upper 32 bits of low half
2886     __ or3(t1, cmp_value_lo, t1);          // t1 holds 64-bit compare value
2887     __ sllx(new_value_hi, 32, t2);
2888     __ srl(new_value_lo, 0, new_value_lo);
2889     __ or3(t2, new_value_lo, t2);          // t2 holds 64-bit value to swap
2890     // perform the compare and swap operation
2891     __ casx(addr, t1, t2);
2892     // generate condition code - if the swap succeeded, t2 ("new value" reg) was
2893     // overwritten with the original value in "addr" and will be equal to t1.
2894     // Produce icc flag for 32bit.
2895     __ sub(t1, t2, t2);
2896     __ srlx(t2, 32, t1);
2897     __ orcc(t2, t1, G0);
2898 #endif
2899   } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
2900     Register addr = op->addr()->as_pointer_register();
2901     Register cmp_value = op->cmp_value()->as_register();
2902     Register new_value = op->new_value()->as_register();
2903     Register t1 = op->tmp1()->as_register();
2904     Register t2 = op->tmp2()->as_register();
2905     __ mov(cmp_value, t1);
2906     __ mov(new_value, t2);
2907     if (op->code() == lir_cas_obj) {
2908       if (UseCompressedOops) {
2909         __ encode_heap_oop(t1);
2910         __ encode_heap_oop(t2);
2911         __ cas(addr, t1, t2);
2912       } else {
2913         __ cas_ptr(addr, t1, t2);
2914       }
2915     } else {
2916       __ cas(addr, t1, t2);
2917     }
2918     __ cmp(t1, t2);
2919   } else {
2920     Unimplemented();
2921   }
2922 }
2923 
2924 void LIR_Assembler::set_24bit_FPU() {
2925   Unimplemented();
2926 }
2927 
2928 
2929 void LIR_Assembler::reset_FPU() {
2930   Unimplemented();
2931 }
2932 
2933 
2934 void LIR_Assembler::breakpoint() {
2935   __ breakpoint_trap();
2936 }
2937 
2938 
2939 void LIR_Assembler::push(LIR_Opr opr) {
2940   Unimplemented();
2941 }
2942 
2943 
2944 void LIR_Assembler::pop(LIR_Opr opr) {
2945   Unimplemented();
2946 }
2947 
2948 
2949 void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
2950   Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no);
2951   Register dst = dst_opr->as_register();
2952   Register reg = mon_addr.base();
2953   int offset = mon_addr.disp();
2954   // compute pointer to BasicLock
2955   if (mon_addr.is_simm13()) {
2956     __ add(reg, offset, dst);
2957   } else {
2958     __ set(offset, dst);
2959     __ add(dst, reg, dst);
2960   }
2961 }
2962 
2963 
2964 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
2965   Register obj = op->obj_opr()->as_register();
2966   Register hdr = op->hdr_opr()->as_register();
2967   Register lock = op->lock_opr()->as_register();
2968 
2969   // obj may not be an oop
2970   if (op->code() == lir_lock) {
2971     MonitorEnterStub* stub = (MonitorEnterStub*)op->stub();
2972     if (UseFastLocking) {
2973       assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
2974       // add debug info for NullPointerException only if one is possible
2975       if (op->info() != NULL) {
2976         add_debug_info_for_null_check_here(op->info());
2977       }
2978       __ lock_object(hdr, obj, lock, op->scratch_opr()->as_register(), *op->stub()->entry());
2979     } else {
2980       // always do slow locking
2981       // note: the slow locking code could be inlined here, however if we use
2982       //       slow locking, speed doesn't matter anyway and this solution is
2983       //       simpler and requires less duplicated code - additionally, the
2984       //       slow locking code is the same in either case which simplifies
2985       //       debugging
2986       __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry());
2987       __ delayed()->nop();
2988     }
2989   } else {
2990     assert (op->code() == lir_unlock, "Invalid code, expected lir_unlock");
2991     if (UseFastLocking) {
2992       assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
2993       __ unlock_object(hdr, obj, lock, *op->stub()->entry());
2994     } else {
2995       // always do slow unlocking
2996       // note: the slow unlocking code could be inlined here, however if we use
2997       //       slow unlocking, speed doesn't matter anyway and this solution is
2998       //       simpler and requires less duplicated code - additionally, the
2999       //       slow unlocking code is the same in either case which simplifies
3000       //       debugging
3001       __ br(Assembler::always, false, Assembler::pt, *op->stub()->entry());
3002       __ delayed()->nop();
3003     }
3004   }
3005   __ bind(*op->stub()->continuation());
3006 }
3007 
3008 
3009 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
3010   ciMethod* method = op->profiled_method();
3011   int bci          = op->profiled_bci();
3012   ciMethod* callee = op->profiled_callee();
3013 
3014   // Update counter for all call types
3015   ciMethodData* md = method->method_data_or_null();
3016   assert(md != NULL, "Sanity");
3017   ciProfileData* data = md->bci_to_data(bci);
3018   assert(data->is_CounterData(), "need CounterData for calls");
3019   assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
3020   Register mdo  = op->mdo()->as_register();
3021 #ifdef _LP64
3022   assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
3023   Register tmp1 = op->tmp1()->as_register_lo();
3024 #else
3025   assert(op->tmp1()->is_single_cpu(), "tmp1 must be allocated");
3026   Register tmp1 = op->tmp1()->as_register();
3027 #endif
3028   metadata2reg(md->constant_encoding(), mdo);
3029   int mdo_offset_bias = 0;
3030   if (!Assembler::is_simm13(md->byte_offset_of_slot(data, CounterData::count_offset()) +
3031                             data->size_in_bytes())) {
3032     // The offset is large so bias the mdo by the base of the slot so
3033     // that the ld can use simm13s to reference the slots of the data
3034     mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
3035     __ set(mdo_offset_bias, O7);
3036     __ add(mdo, O7, mdo);
3037   }
3038 
3039   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
3040   Bytecodes::Code bc = method->java_code_at_bci(bci);
3041   const bool callee_is_static = callee->is_loaded() && callee->is_static();
3042   // Perform additional virtual call profiling for invokevirtual and
3043   // invokeinterface bytecodes
3044   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
3045       !callee_is_static &&  // required for optimized MH invokes
3046       C1ProfileVirtualCalls) {
3047     assert(op->recv()->is_single_cpu(), "recv must be allocated");
3048     Register recv = op->recv()->as_register();
3049     assert_different_registers(mdo, tmp1, recv);
3050     assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
3051     ciKlass* known_klass = op->known_holder();
3052     if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
3053       // We know the type that will be seen at this call site; we can
3054       // statically update the MethodData* rather than needing to do
3055       // dynamic tests on the receiver type
3056 
3057       // NOTE: we should probably put a lock around this search to
3058       // avoid collisions by concurrent compilations
3059       ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
3060       uint i;
3061       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3062         ciKlass* receiver = vc_data->receiver(i);
3063         if (known_klass->equals(receiver)) {
3064           Address data_addr(mdo, md->byte_offset_of_slot(data,
3065                                                          VirtualCallData::receiver_count_offset(i)) -
3066                             mdo_offset_bias);
3067           __ ld_ptr(data_addr, tmp1);
3068           __ add(tmp1, DataLayout::counter_increment, tmp1);
3069           __ st_ptr(tmp1, data_addr);
3070           return;
3071         }
3072       }
3073 
3074       // Receiver type not found in profile data; select an empty slot
3075 
3076       // Note that this is less efficient than it should be because it
3077       // always does a write to the receiver part of the
3078       // VirtualCallData rather than just the first time
3079       for (i = 0; i < VirtualCallData::row_limit(); i++) {
3080         ciKlass* receiver = vc_data->receiver(i);
3081         if (receiver == NULL) {
3082           Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) -
3083                             mdo_offset_bias);
3084           metadata2reg(known_klass->constant_encoding(), tmp1);
3085           __ st_ptr(tmp1, recv_addr);
3086           Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) -
3087                             mdo_offset_bias);
3088           __ ld_ptr(data_addr, tmp1);
3089           __ add(tmp1, DataLayout::counter_increment, tmp1);
3090           __ st_ptr(tmp1, data_addr);
3091           return;
3092         }
3093       }
3094     } else {
3095       __ load_klass(recv, recv);
3096       Label update_done;
3097       type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
3098       // Receiver did not match any saved receiver and there is no empty row for it.
3099       // Increment total counter to indicate polymorphic case.
3100       __ ld_ptr(counter_addr, tmp1);
3101       __ add(tmp1, DataLayout::counter_increment, tmp1);
3102       __ st_ptr(tmp1, counter_addr);
3103 
3104       __ bind(update_done);
3105     }
3106   } else {
3107     // Static call
3108     __ ld_ptr(counter_addr, tmp1);
3109     __ add(tmp1, DataLayout::counter_increment, tmp1);
3110     __ st_ptr(tmp1, counter_addr);
3111   }
3112 }
3113 
3114 void LIR_Assembler::align_backward_branch_target() {
3115   __ align(OptoLoopAlignment);
3116 }
3117 
3118 
3119 void LIR_Assembler::emit_delay(LIR_OpDelay* op) {
3120   // make sure we are expecting a delay
3121   // this has the side effect of clearing the delay state
3122   // so we can use _masm instead of _masm->delayed() to do the
3123   // code generation.
3124   __ delayed();
3125 
3126   // make sure we only emit one instruction
3127   int offset = code_offset();
3128   op->delay_op()->emit_code(this);
3129 #ifdef ASSERT
3130   if (code_offset() - offset != NativeInstruction::nop_instruction_size) {
3131     op->delay_op()->print();
3132   }
3133   assert(code_offset() - offset == NativeInstruction::nop_instruction_size,
3134          "only one instruction can go in a delay slot");
3135 #endif
3136 
3137   // we may also be emitting the call info for the instruction
3138   // which we are the delay slot of.
3139   CodeEmitInfo* call_info = op->call_info();
3140   if (call_info) {
3141     add_call_info(code_offset(), call_info);
3142   }
3143 
3144   if (VerifyStackAtCalls) {
3145     _masm->sub(FP, SP, O7);
3146     _masm->cmp(O7, initial_frame_size_in_bytes());
3147     _masm->trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2 );
3148   }
3149 }
3150 
3151 
3152 void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
3153   assert(left->is_register(), "can only handle registers");
3154 
3155   if (left->is_single_cpu()) {
3156     __ neg(left->as_register(), dest->as_register());
3157   } else if (left->is_single_fpu()) {
3158     __ fneg(FloatRegisterImpl::S, left->as_float_reg(), dest->as_float_reg());
3159   } else if (left->is_double_fpu()) {
3160     __ fneg(FloatRegisterImpl::D, left->as_double_reg(), dest->as_double_reg());
3161   } else {
3162     assert (left->is_double_cpu(), "Must be a long");
3163     Register Rlow = left->as_register_lo();
3164     Register Rhi = left->as_register_hi();
3165 #ifdef _LP64
3166     __ sub(G0, Rlow, dest->as_register_lo());
3167 #else
3168     __ subcc(G0, Rlow, dest->as_register_lo());
3169     __ subc (G0, Rhi,  dest->as_register_hi());
3170 #endif
3171   }
3172 }
3173 
3174 
3175 void LIR_Assembler::fxch(int i) {
3176   Unimplemented();
3177 }
3178 
3179 void LIR_Assembler::fld(int i) {
3180   Unimplemented();
3181 }
3182 
3183 void LIR_Assembler::ffree(int i) {
3184   Unimplemented();
3185 }
3186 
3187 void LIR_Assembler::rt_call(LIR_Opr result, address dest,
3188                             const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
3189 
3190   // if tmp is invalid, then the function being called doesn't destroy the thread
3191   if (tmp->is_valid()) {
3192     __ save_thread(tmp->as_register());
3193   }
3194   __ call(dest, relocInfo::runtime_call_type);
3195   __ delayed()->nop();
3196   if (info != NULL) {
3197     add_call_info_here(info);
3198   }
3199   if (tmp->is_valid()) {
3200     __ restore_thread(tmp->as_register());
3201   }
3202 
3203 #ifdef ASSERT
3204   __ verify_thread();
3205 #endif // ASSERT
3206 }
3207 
3208 
3209 void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
3210 #ifdef _LP64
3211   ShouldNotReachHere();
3212 #endif
3213 
3214   NEEDS_CLEANUP;
3215   if (type == T_LONG) {
3216     LIR_Address* mem_addr = dest->is_address() ? dest->as_address_ptr() : src->as_address_ptr();
3217 
3218     // (extended to allow indexed as well as constant displaced for JSR-166)
3219     Register idx = noreg; // contains either constant offset or index
3220 
3221     int disp = mem_addr->disp();
3222     if (mem_addr->index() == LIR_OprFact::illegalOpr) {
3223       if (!Assembler::is_simm13(disp)) {
3224         idx = O7;
3225         __ set(disp, idx);
3226       }
3227     } else {
3228       assert(disp == 0, "not both indexed and disp");
3229       idx = mem_addr->index()->as_register();
3230     }
3231 
3232     int null_check_offset = -1;
3233 
3234     Register base = mem_addr->base()->as_register();
3235     if (src->is_register() && dest->is_address()) {
3236       // G4 is high half, G5 is low half
3237       if (VM_Version::v9_instructions_work()) {
3238         // clear the top bits of G5, and scale up G4
3239         __ srl (src->as_register_lo(),  0, G5);
3240         __ sllx(src->as_register_hi(), 32, G4);
3241         // combine the two halves into the 64 bits of G4
3242         __ or3(G4, G5, G4);
3243         null_check_offset = __ offset();
3244         if (idx == noreg) {
3245           __ stx(G4, base, disp);
3246         } else {
3247           __ stx(G4, base, idx);
3248         }
3249       } else {
3250         __ mov (src->as_register_hi(), G4);
3251         __ mov (src->as_register_lo(), G5);
3252         null_check_offset = __ offset();
3253         if (idx == noreg) {
3254           __ std(G4, base, disp);
3255         } else {
3256           __ std(G4, base, idx);
3257         }
3258       }
3259     } else if (src->is_address() && dest->is_register()) {
3260       null_check_offset = __ offset();
3261       if (VM_Version::v9_instructions_work()) {
3262         if (idx == noreg) {
3263           __ ldx(base, disp, G5);
3264         } else {
3265           __ ldx(base, idx, G5);
3266         }
3267         __ srax(G5, 32, dest->as_register_hi()); // fetch the high half into hi
3268         __ mov (G5, dest->as_register_lo());     // copy low half into lo
3269       } else {
3270         if (idx == noreg) {
3271           __ ldd(base, disp, G4);
3272         } else {
3273           __ ldd(base, idx, G4);
3274         }
3275         // G4 is high half, G5 is low half
3276         __ mov (G4, dest->as_register_hi());
3277         __ mov (G5, dest->as_register_lo());
3278       }
3279     } else {
3280       Unimplemented();
3281     }
3282     if (info != NULL) {
3283       add_debug_info_for_null_check(null_check_offset, info);
3284     }
3285 
3286   } else {
3287     // use normal move for all other volatiles since they don't need
3288     // special handling to remain atomic.
3289     move_op(src, dest, type, lir_patch_none, info, false, false, false);
3290   }
3291 }
3292 
3293 void LIR_Assembler::membar() {
3294   // only StoreLoad membars are ever explicitly needed on sparcs in TSO mode
3295   __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
3296 }
3297 
3298 void LIR_Assembler::membar_acquire() {
3299   // no-op on TSO
3300 }
3301 
3302 void LIR_Assembler::membar_release() {
3303   // no-op on TSO
3304 }
3305 
3306 void LIR_Assembler::membar_loadload() {
3307   // no-op
3308   //__ membar(Assembler::Membar_mask_bits(Assembler::loadload));
3309 }
3310 
3311 void LIR_Assembler::membar_storestore() {
3312   // no-op
3313   //__ membar(Assembler::Membar_mask_bits(Assembler::storestore));
3314 }
3315 
3316 void LIR_Assembler::membar_loadstore() {
3317   // no-op
3318   //__ membar(Assembler::Membar_mask_bits(Assembler::loadstore));
3319 }
3320 
3321 void LIR_Assembler::membar_storeload() {
3322   __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
3323 }
3324 
3325 
3326 // Pack two sequential registers containing 32 bit values
3327 // into a single 64 bit register.
3328 // src and src->successor() are packed into dst
3329 // src and dst may be the same register.
3330 // Note: src is destroyed
3331 void LIR_Assembler::pack64(LIR_Opr src, LIR_Opr dst) {
3332   Register rs = src->as_register();
3333   Register rd = dst->as_register_lo();
3334   __ sllx(rs, 32, rs);
3335   __ srl(rs->successor(), 0, rs->successor());
3336   __ or3(rs, rs->successor(), rd);
3337 }
3338 
3339 // Unpack a 64 bit value in a register into
3340 // two sequential registers.
3341 // src is unpacked into dst and dst->successor()
3342 void LIR_Assembler::unpack64(LIR_Opr src, LIR_Opr dst) {
3343   Register rs = src->as_register_lo();
3344   Register rd = dst->as_register_hi();
3345   assert_different_registers(rs, rd, rd->successor());
3346   __ srlx(rs, 32, rd);
3347   __ srl (rs,  0, rd->successor());
3348 }
3349 
3350 
3351 void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
3352   LIR_Address* addr = addr_opr->as_address_ptr();
3353   assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet");
3354 
3355   __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register());
3356 }
3357 
3358 
3359 void LIR_Assembler::get_thread(LIR_Opr result_reg) {
3360   assert(result_reg->is_register(), "check");
3361   __ mov(G2_thread, result_reg->as_register());
3362 }
3363 
3364 #ifdef ASSERT
3365 // emit run-time assertion
3366 void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
3367   assert(op->code() == lir_assert, "must be");
3368 
3369   if (op->in_opr1()->is_valid()) {
3370     assert(op->in_opr2()->is_valid(), "both operands must be valid");
3371     comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
3372   } else {
3373     assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
3374     assert(op->condition() == lir_cond_always, "no other conditions allowed");
3375   }
3376 
3377   Label ok;
3378   if (op->condition() != lir_cond_always) {
3379     Assembler::Condition acond;
3380     switch (op->condition()) {
3381       case lir_cond_equal:        acond = Assembler::equal;                break;
3382       case lir_cond_notEqual:     acond = Assembler::notEqual;             break;
3383       case lir_cond_less:         acond = Assembler::less;                 break;
3384       case lir_cond_lessEqual:    acond = Assembler::lessEqual;            break;
3385       case lir_cond_greaterEqual: acond = Assembler::greaterEqual;         break;
3386       case lir_cond_greater:      acond = Assembler::greater;              break;
3387       case lir_cond_aboveEqual:   acond = Assembler::greaterEqualUnsigned; break;
3388       case lir_cond_belowEqual:   acond = Assembler::lessEqualUnsigned;    break;
3389       default:                         ShouldNotReachHere();
3390     };
3391     __ br(acond, false, Assembler::pt, ok);
3392     __ delayed()->nop();
3393   }
3394   if (op->halt()) {
3395     const char* str = __ code_string(op->msg());
3396     __ stop(str);
3397   } else {
3398     breakpoint();
3399   }
3400   __ bind(ok);
3401 }
3402 #endif
3403 
3404 void LIR_Assembler::peephole(LIR_List* lir) {
3405   LIR_OpList* inst = lir->instructions_list();
3406   for (int i = 0; i < inst->length(); i++) {
3407     LIR_Op* op = inst->at(i);
3408     switch (op->code()) {
3409       case lir_cond_float_branch:
3410       case lir_branch: {
3411         LIR_OpBranch* branch = op->as_OpBranch();
3412         assert(branch->info() == NULL, "shouldn't be state on branches anymore");
3413         LIR_Op* delay_op = NULL;
3414         // we'd like to be able to pull following instructions into
3415         // this slot but we don't know enough to do it safely yet so
3416         // only optimize block to block control flow.
3417         if (LIRFillDelaySlots && branch->block()) {
3418           LIR_Op* prev = inst->at(i - 1);
3419           if (prev && LIR_Assembler::is_single_instruction(prev) && prev->info() == NULL) {
3420             // swap previous instruction into delay slot
3421             inst->at_put(i - 1, op);
3422             inst->at_put(i, new LIR_OpDelay(prev, op->info()));
3423 #ifndef PRODUCT
3424             if (LIRTracePeephole) {
3425               tty->print_cr("delayed");
3426               inst->at(i - 1)->print();
3427               inst->at(i)->print();
3428               tty->cr();
3429             }
3430 #endif
3431             continue;
3432           }
3433         }
3434 
3435         if (!delay_op) {
3436           delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), NULL);
3437         }
3438         inst->insert_before(i + 1, delay_op);
3439         break;
3440       }
3441       case lir_static_call:
3442       case lir_virtual_call:
3443       case lir_icvirtual_call:
3444       case lir_optvirtual_call:
3445       case lir_dynamic_call: {
3446         LIR_Op* prev = inst->at(i - 1);
3447         if (LIRFillDelaySlots && prev && prev->code() == lir_move && prev->info() == NULL &&
3448             (op->code() != lir_virtual_call ||
3449              !prev->result_opr()->is_single_cpu() ||
3450              prev->result_opr()->as_register() != O0) &&
3451             LIR_Assembler::is_single_instruction(prev)) {
3452           // Only moves without info can be put into the delay slot.
3453           // Also don't allow the setup of the receiver in the delay
3454           // slot for vtable calls.
3455           inst->at_put(i - 1, op);
3456           inst->at_put(i, new LIR_OpDelay(prev, op->info()));
3457 #ifndef PRODUCT
3458           if (LIRTracePeephole) {
3459             tty->print_cr("delayed");
3460             inst->at(i - 1)->print();
3461             inst->at(i)->print();
3462             tty->cr();
3463           }
3464 #endif
3465         } else {
3466           LIR_Op* delay_op = new LIR_OpDelay(new LIR_Op0(lir_nop), op->as_OpJavaCall()->info());
3467           inst->insert_before(i + 1, delay_op);
3468           i++;
3469         }
3470 
3471 #if defined(TIERED) && !defined(_LP64)
3472         // fixup the return value from G1 to O0/O1 for long returns.
3473         // It's done here instead of in LIRGenerator because there's
3474         // such a mismatch between the single reg and double reg
3475         // calling convention.
3476         LIR_OpJavaCall* callop = op->as_OpJavaCall();
3477         if (callop->result_opr() == FrameMap::out_long_opr) {
3478           LIR_OpJavaCall* call;
3479           LIR_OprList* arguments = new LIR_OprList(callop->arguments()->length());
3480           for (int a = 0; a < arguments->length(); a++) {
3481             arguments[a] = callop->arguments()[a];
3482           }
3483           if (op->code() == lir_virtual_call) {
3484             call = new LIR_OpJavaCall(op->code(), callop->method(), callop->receiver(), FrameMap::g1_long_single_opr,
3485                                       callop->vtable_offset(), arguments, callop->info());
3486           } else {
3487             call = new LIR_OpJavaCall(op->code(), callop->method(), callop->receiver(), FrameMap::g1_long_single_opr,
3488                                       callop->addr(), arguments, callop->info());
3489           }
3490           inst->at_put(i - 1, call);
3491           inst->insert_before(i + 1, new LIR_Op1(lir_unpack64, FrameMap::g1_long_single_opr, callop->result_opr(),
3492                                                  T_LONG, lir_patch_none, NULL));
3493         }
3494 #endif
3495         break;
3496       }
3497     }
3498   }
3499 }
3500 
3501 void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
3502   LIR_Address* addr = src->as_address_ptr();
3503 
3504   assert(data == dest, "swap uses only 2 operands");
3505   assert (code == lir_xchg, "no xadd on sparc");
3506 
3507   if (data->type() == T_INT) {
3508     __ swap(as_Address(addr), data->as_register());
3509   } else if (data->is_oop()) {
3510     Register obj = data->as_register();
3511     Register narrow = tmp->as_register();
3512 #ifdef _LP64
3513     assert(UseCompressedOops, "swap is 32bit only");
3514     __ encode_heap_oop(obj, narrow);
3515     __ swap(as_Address(addr), narrow);
3516     __ decode_heap_oop(narrow, obj);
3517 #else
3518     __ swap(as_Address(addr), obj);
3519 #endif
3520   } else {
3521     ShouldNotReachHere();
3522   }
3523 }
3524 
3525 #undef __