1676 assert(Universe::heap() != NULL, "java heap should be initialized");
1677 st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
1678 st->print_cr("\tSLL R_G5,3,R_G5");
1679 if (Universe::narrow_oop_base() != NULL)
1680 st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5");
1681 } else {
1682 st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
1683 }
1684 st->print_cr("\tCMP R_G5,R_G3" );
1685 st->print ("\tTne xcc,R_G0+ST_RESERVED_FOR_USER_0+2");
1686 #else // _LP64
1687 st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
1688 st->print_cr("\tCMP R_G5,R_G3" );
1689 st->print ("\tTne icc,R_G0+ST_RESERVED_FOR_USER_0+2");
1690 #endif // _LP64
1691 }
1692 #endif
1693
1694 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1695 MacroAssembler _masm(&cbuf);
1696 Label L;
1697 Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
1698 Register temp_reg = G3;
1699 assert( G5_ic_reg != temp_reg, "conflicting registers" );
1700
1701 // Load klass from receiver
1702 __ load_klass(O0, temp_reg);
1703 // Compare against expected klass
1704 __ cmp(temp_reg, G5_ic_reg);
1705 // Branch to miss code, checks xcc or icc depending
1706 __ trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2);
1707 }
1708
1709 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1710 return MachNode::size(ra_);
1711 }
1712
1713
1714 //=============================================================================
1715
1716 uint size_exception_handler() {
2298 // CMP $src1,$src2
2299 emit3( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, $src1$$reg, 0, $src2$$reg );
2300 // blt,a,pn done
2301 emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less , Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 5 );
2302 // mov dst,-1 in delay slot
2303 emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 );
2304 // bgt,a,pn done
2305 emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::greater, Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 3 );
2306 // mov dst,1 in delay slot
2307 emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, 1 );
2308 // CLR $dst
2309 emit3( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3 , 0, 0, 0 );
2310 %}
2311
2312 enc_class enc_PartialSubtypeCheck() %{
2313 MacroAssembler _masm(&cbuf);
2314 __ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
2315 __ delayed()->nop();
2316 %}
2317
2318 enc_class enc_bp( Label labl, cmpOp cmp, flagsReg cc ) %{
2319 MacroAssembler _masm(&cbuf);
2320 Label &L = *($labl$$label);
2321 Assembler::Predict predict_taken =
2322 cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
2323
2324 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, L);
2325 __ delayed()->nop();
2326 %}
2327
2328 enc_class enc_bpl( Label labl, cmpOp cmp, flagsRegL cc ) %{
2329 MacroAssembler _masm(&cbuf);
2330 Label &L = *($labl$$label);
2331 Assembler::Predict predict_taken =
2332 cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
2333
2334 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, L);
2335 __ delayed()->nop();
2336 %}
2337
2338 enc_class enc_bpx( Label labl, cmpOp cmp, flagsRegP cc ) %{
2339 MacroAssembler _masm(&cbuf);
2340 Label &L = *($labl$$label);
2341 Assembler::Predict predict_taken =
2342 cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
2343
2344 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, L);
2345 __ delayed()->nop();
2346 %}
2347
2348 enc_class enc_fbp( Label labl, cmpOpF cmp, flagsRegF cc ) %{
2349 MacroAssembler _masm(&cbuf);
2350 Label &L = *($labl$$label);
2351 Assembler::Predict predict_taken =
2352 cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
2353
2354 __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($cc$$reg), predict_taken, L);
2355 __ delayed()->nop();
2356 %}
2357
2358 enc_class enc_ba( Label labl ) %{
2359 MacroAssembler _masm(&cbuf);
2360 Label &L = *($labl$$label);
2361 __ ba(false, L);
2362 __ delayed()->nop();
2363 %}
2364
2365 enc_class enc_bpr( Label labl, cmpOp_reg cmp, iRegI op1 ) %{
2366 MacroAssembler _masm(&cbuf);
2367 Label &L = *$labl$$label;
2368 Assembler::Predict predict_taken =
2369 cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
2370
2371 __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), L);
2372 __ delayed()->nop();
2373 %}
2374
2375 enc_class enc_cmov_reg( cmpOp cmp, iRegI dst, iRegI src, immI pcc) %{
2376 int op = (Assembler::arith_op << 30) |
2377 ($dst$$reg << 25) |
2378 (Assembler::movcc_op3 << 19) |
2379 (1 << 18) | // cc2 bit for 'icc'
2380 ($cmp$$cmpcode << 14) |
2381 (0 << 13) | // select register move
2382 ($pcc$$constant << 11) | // cc1, cc0 bits for 'icc' or 'xcc'
2383 ($src$$reg << 0);
2384 cbuf.insts()->emit_int32(op);
2385 %}
2386
2387 enc_class enc_cmov_imm( cmpOp cmp, iRegI dst, immI11 src, immI pcc ) %{
2388 int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits
2389 int op = (Assembler::arith_op << 30) |
2390 ($dst$$reg << 25) |
2391 (Assembler::movcc_op3 << 19) |
2392 (1 << 18) | // cc2 bit for 'icc'
2393 ($cmp$$cmpcode << 14) |
2394 (1 << 13) | // select immediate move
2969 enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result) %{
2970 Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
2971 MacroAssembler _masm(&cbuf);
2972
2973 Register str1_reg = reg_to_register_object($str1$$reg);
2974 Register str2_reg = reg_to_register_object($str2$$reg);
2975 Register cnt_reg = reg_to_register_object($cnt$$reg);
2976 Register tmp1_reg = O7;
2977 Register result_reg = reg_to_register_object($result$$reg);
2978
2979 assert(result_reg != str1_reg &&
2980 result_reg != str2_reg &&
2981 result_reg != cnt_reg &&
2982 result_reg != tmp1_reg ,
2983 "need different registers");
2984
2985 __ cmp(str1_reg, str2_reg); //same char[] ?
2986 __ brx(Assembler::equal, true, Assembler::pn, Ldone);
2987 __ delayed()->add(G0, 1, result_reg);
2988
2989 __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, cnt_reg, Ldone);
2990 __ delayed()->add(G0, 1, result_reg); // count == 0
2991
2992 //rename registers
2993 Register limit_reg = cnt_reg;
2994 Register chr1_reg = result_reg;
2995 Register chr2_reg = tmp1_reg;
2996
2997 //check for alignment and position the pointers to the ends
2998 __ or3(str1_reg, str2_reg, chr1_reg);
2999 __ andcc(chr1_reg, 0x3, chr1_reg);
3000 // notZero means at least one not 4-byte aligned.
3001 // We could optimize the case when both arrays are not aligned
3002 // but it is not frequent case and it requires additional checks.
3003 __ br(Assembler::notZero, false, Assembler::pn, Lchar); // char by char compare
3004 __ delayed()->sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); // set byte count
3005
3006 // Compare char[] arrays aligned to 4 bytes.
3007 __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
3008 chr1_reg, chr2_reg, Ldone);
3009 __ ba(false,Ldone);
3010 __ delayed()->add(G0, 1, result_reg);
3011
3012 // char by char compare
3013 __ bind(Lchar);
3014 __ add(str1_reg, limit_reg, str1_reg);
3015 __ add(str2_reg, limit_reg, str2_reg);
3016 __ neg(limit_reg); //negate count
3017
3018 __ lduh(str1_reg, limit_reg, chr1_reg);
3019 // Lchar_loop
3020 __ bind(Lchar_loop);
3021 __ lduh(str2_reg, limit_reg, chr2_reg);
3022 __ cmp(chr1_reg, chr2_reg);
3023 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
3024 __ delayed()->mov(G0, result_reg); //not equal
3025 __ inccc(limit_reg, sizeof(jchar));
3026 // annul LDUH if branch is not taken to prevent access past end of string
3027 __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop);
3028 __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
3029
3048 // return true if the same array
3049 __ cmp(ary1_reg, ary2_reg);
3050 __ brx(Assembler::equal, true, Assembler::pn, Ldone);
3051 __ delayed()->add(G0, 1, result_reg); // equal
3052
3053 __ br_null(ary1_reg, true, Assembler::pn, Ldone);
3054 __ delayed()->mov(G0, result_reg); // not equal
3055
3056 __ br_null(ary2_reg, true, Assembler::pn, Ldone);
3057 __ delayed()->mov(G0, result_reg); // not equal
3058
3059 //load the lengths of arrays
3060 __ ld(Address(ary1_reg, length_offset), tmp1_reg);
3061 __ ld(Address(ary2_reg, length_offset), tmp2_reg);
3062
3063 // return false if the two arrays are not equal length
3064 __ cmp(tmp1_reg, tmp2_reg);
3065 __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
3066 __ delayed()->mov(G0, result_reg); // not equal
3067
3068 __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, tmp1_reg, Ldone);
3069 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
3070
3071 // load array addresses
3072 __ add(ary1_reg, base_offset, ary1_reg);
3073 __ add(ary2_reg, base_offset, ary2_reg);
3074
3075 // renaming registers
3076 Register chr1_reg = result_reg; // for characters in ary1
3077 Register chr2_reg = tmp2_reg; // for characters in ary2
3078 Register limit_reg = tmp1_reg; // length
3079
3080 // set byte count
3081 __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
3082
3083 // Compare char[] arrays aligned to 4 bytes.
3084 __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
3085 chr1_reg, chr2_reg, Ldone);
3086 __ add(G0, 1, result_reg); // equals
3087
3088 __ bind(Ldone);
9215 __ add($constanttablebase, con_offset, table_reg);
9216 }
9217
9218 // Jump to base address + switch value
9219 __ ld_ptr(table_reg, $switch_val$$Register, label_reg);
9220 __ jmp(label_reg, G0);
9221 __ delayed()->nop();
9222 %}
9223 ins_pc_relative(1);
9224 ins_pipe(ialu_reg_reg);
9225 %}
9226
9227 // Direct Branch. Use V8 version with longer range.
9228 instruct branch(label labl) %{
9229 match(Goto);
9230 effect(USE labl);
9231
9232 size(8);
9233 ins_cost(BRANCH_COST);
9234 format %{ "BA $labl" %}
9235 // Prim = bits 24-22, Secnd = bits 31-30, Tert = cond
9236 opcode(Assembler::br_op2, Assembler::branch_op, Assembler::always);
9237 ins_encode( enc_ba( labl ) );
9238 ins_pc_relative(1);
9239 ins_pipe(br);
9240 %}
9241
9242 // Conditional Direct Branch
9243 instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
9244 match(If cmp icc);
9245 effect(USE labl);
9246
9247 size(8);
9248 ins_cost(BRANCH_COST);
9249 format %{ "BP$cmp $icc,$labl" %}
9250 // Prim = bits 24-22, Secnd = bits 31-30
9251 ins_encode( enc_bp( labl, cmp, icc ) );
9252 ins_pc_relative(1);
9253 ins_pipe(br_cc);
9254 %}
9255
9256 // Branch-on-register tests all 64 bits. We assume that values
9257 // in 64-bit registers always remains zero or sign extended
9297 %}
9298
9299 instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
9300 match(If cmp icc);
9301 effect(USE labl);
9302
9303 format %{ "BP$cmp $icc,$labl" %}
9304 // Prim = bits 24-22, Secnd = bits 31-30
9305 ins_encode( enc_bp( labl, cmp, icc ) );
9306 ins_pc_relative(1);
9307 ins_pipe(br_cc);
9308 %}
9309
9310 instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
9311 match(If cmp pcc);
9312 effect(USE labl);
9313
9314 size(8);
9315 ins_cost(BRANCH_COST);
9316 format %{ "BP$cmp $pcc,$labl" %}
9317 // Prim = bits 24-22, Secnd = bits 31-30
9318 ins_encode( enc_bpx( labl, cmp, pcc ) );
9319 ins_pc_relative(1);
9320 ins_pipe(br_cc);
9321 %}
9322
9323 instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{
9324 match(If cmp fcc);
9325 effect(USE labl);
9326
9327 size(8);
9328 ins_cost(BRANCH_COST);
9329 format %{ "FBP$cmp $fcc,$labl" %}
9330 // Prim = bits 24-22, Secnd = bits 31-30
9331 ins_encode( enc_fbp( labl, cmp, fcc ) );
9332 ins_pc_relative(1);
9333 ins_pipe(br_fcc);
9334 %}
9335
9336 instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
9337 match(CountedLoopEnd cmp icc);
9338 effect(USE labl);
9339
9340 size(8);
9341 ins_cost(BRANCH_COST);
9342 format %{ "BP$cmp $icc,$labl\t! Loop end" %}
9343 // Prim = bits 24-22, Secnd = bits 31-30
9344 ins_encode( enc_bp( labl, cmp, icc ) );
9345 ins_pc_relative(1);
9346 ins_pipe(br_cc);
9347 %}
9348
9349 instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
9350 match(CountedLoopEnd cmp icc);
9351 effect(USE labl);
9370 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
9371 // NE test is negated from that.
9372
9373 // Due to a shortcoming in the ADLC, it mixes up expressions like:
9374 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
9375 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
9376 // are collapsed internally in the ADLC's dfa-gen code. The match for
9377 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
9378 // foo match ends up with the wrong leaf. One fix is to not match both
9379 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
9380 // both forms beat the trinary form of long-compare and both are very useful
9381 // on Intel which has so few registers.
9382
9383 instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
9384 match(If cmp xcc);
9385 effect(USE labl);
9386
9387 size(8);
9388 ins_cost(BRANCH_COST);
9389 format %{ "BP$cmp $xcc,$labl" %}
9390 // Prim = bits 24-22, Secnd = bits 31-30
9391 ins_encode( enc_bpl( labl, cmp, xcc ) );
9392 ins_pc_relative(1);
9393 ins_pipe(br_cc);
9394 %}
9395
9396 // Manifest a CmpL3 result in an integer register. Very painful.
9397 // This is the test to avoid.
9398 instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
9399 match(Set dst (CmpL3 src1 src2) );
9400 effect( KILL ccr );
9401 ins_cost(6*DEFAULT_COST);
9402 size(24);
9403 format %{ "CMP $src1,$src2\t\t! long\n"
9404 "\tBLT,a,pn done\n"
9405 "\tMOV -1,$dst\t! delay slot\n"
9406 "\tBGT,a,pn done\n"
9407 "\tMOV 1,$dst\t! delay slot\n"
9408 "\tCLR $dst\n"
9409 "done:" %}
9410 ins_encode( cmpl_flag(src1,src2,dst) );
9411 ins_pipe(cmpL_reg);
9690
9691 instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{
9692 match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero));
9693 effect( KILL idx, KILL o7 );
9694 ins_cost(DEFAULT_COST*10);
9695 format %{ "CALL PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %}
9696 ins_encode( enc_PartialSubtypeCheck() );
9697 ins_pipe(partial_subtype_check_pipe);
9698 %}
9699
9700
9701 // ============================================================================
9702 // inlined locking and unlocking
9703
9704 instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
9705 match(Set pcc (FastLock object box));
9706
9707 effect(KILL scratch, TEMP scratch2);
9708 ins_cost(100);
9709
9710 size(4*112); // conservative overestimation ...
9711 format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $box" %}
9712 ins_encode( Fast_Lock(object, box, scratch, scratch2) );
9713 ins_pipe(long_memory_op);
9714 %}
9715
9716
9717 instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
9718 match(Set pcc (FastUnlock object box));
9719 effect(KILL scratch, TEMP scratch2);
9720 ins_cost(100);
9721
9722 size(4*120); // conservative overestimation ...
9723 format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %}
9724 ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
9725 ins_pipe(long_memory_op);
9726 %}
9727
9728 // Count and Base registers are fixed because the allocator cannot
9729 // kill unknown registers. The encodings are generic.
9730 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
9731 match(Set dummy (ClearArray cnt base));
9732 effect(TEMP temp, KILL ccr);
9733 ins_cost(300);
9734 format %{ "MOV $cnt,$temp\n"
9735 "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n"
9736 " BRge loop\t\t! Clearing loop\n"
9737 " STX G0,[$base+$temp]\t! delay slot" %}
9738 ins_encode( enc_Clear_Array(cnt, base, temp) );
9739 ins_pipe(long_memory_op);
9740 %}
9741
9742 instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
|
1676 assert(Universe::heap() != NULL, "java heap should be initialized");
1677 st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
1678 st->print_cr("\tSLL R_G5,3,R_G5");
1679 if (Universe::narrow_oop_base() != NULL)
1680 st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5");
1681 } else {
1682 st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
1683 }
1684 st->print_cr("\tCMP R_G5,R_G3" );
1685 st->print ("\tTne xcc,R_G0+ST_RESERVED_FOR_USER_0+2");
1686 #else // _LP64
1687 st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
1688 st->print_cr("\tCMP R_G5,R_G3" );
1689 st->print ("\tTne icc,R_G0+ST_RESERVED_FOR_USER_0+2");
1690 #endif // _LP64
1691 }
1692 #endif
1693
1694 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1695 MacroAssembler _masm(&cbuf);
1696 Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
1697 Register temp_reg = G3;
1698 assert( G5_ic_reg != temp_reg, "conflicting registers" );
1699
1700 // Load klass from receiver
1701 __ load_klass(O0, temp_reg);
1702 // Compare against expected klass
1703 __ cmp(temp_reg, G5_ic_reg);
1704 // Branch to miss code, checks xcc or icc depending
1705 __ trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2);
1706 }
1707
1708 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1709 return MachNode::size(ra_);
1710 }
1711
1712
1713 //=============================================================================
1714
1715 uint size_exception_handler() {
2297 // CMP $src1,$src2
2298 emit3( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, $src1$$reg, 0, $src2$$reg );
2299 // blt,a,pn done
2300 emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less , Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 5 );
2301 // mov dst,-1 in delay slot
2302 emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 );
2303 // bgt,a,pn done
2304 emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::greater, Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 3 );
2305 // mov dst,1 in delay slot
2306 emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, 1 );
2307 // CLR $dst
2308 emit3( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3 , 0, 0, 0 );
2309 %}
2310
2311 enc_class enc_PartialSubtypeCheck() %{
2312 MacroAssembler _masm(&cbuf);
2313 __ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
2314 __ delayed()->nop();
2315 %}
2316
2317 enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
2318 MacroAssembler _masm(&cbuf);
2319 Label* L = $labl$$label;
2320 Assembler::Predict predict_taken =
2321 cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
2322
2323 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
2324 __ delayed()->nop();
2325 %}
2326
2327 enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
2328 MacroAssembler _masm(&cbuf);
2329 Label* L = $labl$$label;
2330 Assembler::Predict predict_taken =
2331 cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
2332
2333 __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), *L);
2334 __ delayed()->nop();
2335 %}
2336
2337 enc_class enc_cmov_reg( cmpOp cmp, iRegI dst, iRegI src, immI pcc) %{
2338 int op = (Assembler::arith_op << 30) |
2339 ($dst$$reg << 25) |
2340 (Assembler::movcc_op3 << 19) |
2341 (1 << 18) | // cc2 bit for 'icc'
2342 ($cmp$$cmpcode << 14) |
2343 (0 << 13) | // select register move
2344 ($pcc$$constant << 11) | // cc1, cc0 bits for 'icc' or 'xcc'
2345 ($src$$reg << 0);
2346 cbuf.insts()->emit_int32(op);
2347 %}
2348
2349 enc_class enc_cmov_imm( cmpOp cmp, iRegI dst, immI11 src, immI pcc ) %{
2350 int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits
2351 int op = (Assembler::arith_op << 30) |
2352 ($dst$$reg << 25) |
2353 (Assembler::movcc_op3 << 19) |
2354 (1 << 18) | // cc2 bit for 'icc'
2355 ($cmp$$cmpcode << 14) |
2356 (1 << 13) | // select immediate move
2931 enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result) %{
2932 Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
2933 MacroAssembler _masm(&cbuf);
2934
2935 Register str1_reg = reg_to_register_object($str1$$reg);
2936 Register str2_reg = reg_to_register_object($str2$$reg);
2937 Register cnt_reg = reg_to_register_object($cnt$$reg);
2938 Register tmp1_reg = O7;
2939 Register result_reg = reg_to_register_object($result$$reg);
2940
2941 assert(result_reg != str1_reg &&
2942 result_reg != str2_reg &&
2943 result_reg != cnt_reg &&
2944 result_reg != tmp1_reg ,
2945 "need different registers");
2946
2947 __ cmp(str1_reg, str2_reg); //same char[] ?
2948 __ brx(Assembler::equal, true, Assembler::pn, Ldone);
2949 __ delayed()->add(G0, 1, result_reg);
2950
2951 __ bpr(Assembler::rc_z, true, Assembler::pn, cnt_reg, Ldone);
2952 __ delayed()->add(G0, 1, result_reg); // count == 0
2953
2954 //rename registers
2955 Register limit_reg = cnt_reg;
2956 Register chr1_reg = result_reg;
2957 Register chr2_reg = tmp1_reg;
2958
2959 //check for alignment and position the pointers to the ends
2960 __ or3(str1_reg, str2_reg, chr1_reg);
2961 __ andcc(chr1_reg, 0x3, chr1_reg);
2962 // notZero means at least one not 4-byte aligned.
2963 // We could optimize the case when both arrays are not aligned
2964 // but it is not frequent case and it requires additional checks.
2965 __ br(Assembler::notZero, false, Assembler::pn, Lchar); // char by char compare
2966 __ delayed()->sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); // set byte count
2967
2968 // Compare char[] arrays aligned to 4 bytes.
2969 __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
2970 chr1_reg, chr2_reg, Ldone);
2971 __ ba(Ldone);
2972 __ delayed()->add(G0, 1, result_reg);
2973
2974 // char by char compare
2975 __ bind(Lchar);
2976 __ add(str1_reg, limit_reg, str1_reg);
2977 __ add(str2_reg, limit_reg, str2_reg);
2978 __ neg(limit_reg); //negate count
2979
2980 __ lduh(str1_reg, limit_reg, chr1_reg);
2981 // Lchar_loop
2982 __ bind(Lchar_loop);
2983 __ lduh(str2_reg, limit_reg, chr2_reg);
2984 __ cmp(chr1_reg, chr2_reg);
2985 __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
2986 __ delayed()->mov(G0, result_reg); //not equal
2987 __ inccc(limit_reg, sizeof(jchar));
2988 // annul LDUH if branch is not taken to prevent access past end of string
2989 __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop);
2990 __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
2991
3010 // return true if the same array
3011 __ cmp(ary1_reg, ary2_reg);
3012 __ brx(Assembler::equal, true, Assembler::pn, Ldone);
3013 __ delayed()->add(G0, 1, result_reg); // equal
3014
3015 __ br_null(ary1_reg, true, Assembler::pn, Ldone);
3016 __ delayed()->mov(G0, result_reg); // not equal
3017
3018 __ br_null(ary2_reg, true, Assembler::pn, Ldone);
3019 __ delayed()->mov(G0, result_reg); // not equal
3020
3021 //load the lengths of arrays
3022 __ ld(Address(ary1_reg, length_offset), tmp1_reg);
3023 __ ld(Address(ary2_reg, length_offset), tmp2_reg);
3024
3025 // return false if the two arrays are not equal length
3026 __ cmp(tmp1_reg, tmp2_reg);
3027 __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
3028 __ delayed()->mov(G0, result_reg); // not equal
3029
3030 __ bpr(Assembler::rc_z, true, Assembler::pn, tmp1_reg, Ldone);
3031 __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
3032
3033 // load array addresses
3034 __ add(ary1_reg, base_offset, ary1_reg);
3035 __ add(ary2_reg, base_offset, ary2_reg);
3036
3037 // renaming registers
3038 Register chr1_reg = result_reg; // for characters in ary1
3039 Register chr2_reg = tmp2_reg; // for characters in ary2
3040 Register limit_reg = tmp1_reg; // length
3041
3042 // set byte count
3043 __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
3044
3045 // Compare char[] arrays aligned to 4 bytes.
3046 __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
3047 chr1_reg, chr2_reg, Ldone);
3048 __ add(G0, 1, result_reg); // equals
3049
3050 __ bind(Ldone);
9177 __ add($constanttablebase, con_offset, table_reg);
9178 }
9179
9180 // Jump to base address + switch value
9181 __ ld_ptr(table_reg, $switch_val$$Register, label_reg);
9182 __ jmp(label_reg, G0);
9183 __ delayed()->nop();
9184 %}
9185 ins_pc_relative(1);
9186 ins_pipe(ialu_reg_reg);
9187 %}
9188
9189 // Direct Branch. Use V8 version with longer range.
9190 instruct branch(label labl) %{
9191 match(Goto);
9192 effect(USE labl);
9193
9194 size(8);
9195 ins_cost(BRANCH_COST);
9196 format %{ "BA $labl" %}
9197 ins_encode %{
9198 Label* L = $labl$$label;
9199 __ ba(*L);
9200 __ delayed()->nop();
9201 %}
9202 ins_pc_relative(1);
9203 ins_pipe(br);
9204 %}
9205
9206 // Conditional Direct Branch
9207 instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
9208 match(If cmp icc);
9209 effect(USE labl);
9210
9211 size(8);
9212 ins_cost(BRANCH_COST);
9213 format %{ "BP$cmp $icc,$labl" %}
9214 // Prim = bits 24-22, Secnd = bits 31-30
9215 ins_encode( enc_bp( labl, cmp, icc ) );
9216 ins_pc_relative(1);
9217 ins_pipe(br_cc);
9218 %}
9219
9220 // Branch-on-register tests all 64 bits. We assume that values
9221 // in 64-bit registers always remains zero or sign extended
9261 %}
9262
9263 instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
9264 match(If cmp icc);
9265 effect(USE labl);
9266
9267 format %{ "BP$cmp $icc,$labl" %}
9268 // Prim = bits 24-22, Secnd = bits 31-30
9269 ins_encode( enc_bp( labl, cmp, icc ) );
9270 ins_pc_relative(1);
9271 ins_pipe(br_cc);
9272 %}
9273
9274 instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
9275 match(If cmp pcc);
9276 effect(USE labl);
9277
9278 size(8);
9279 ins_cost(BRANCH_COST);
9280 format %{ "BP$cmp $pcc,$labl" %}
9281 ins_encode %{
9282 Label* L = $labl$$label;
9283 Assembler::Predict predict_taken =
9284 cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
9285
9286 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
9287 __ delayed()->nop();
9288 %}
9289 ins_pc_relative(1);
9290 ins_pipe(br_cc);
9291 %}
9292
9293 instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{
9294 match(If cmp fcc);
9295 effect(USE labl);
9296
9297 size(8);
9298 ins_cost(BRANCH_COST);
9299 format %{ "FBP$cmp $fcc,$labl" %}
9300 ins_encode %{
9301 Label* L = $labl$$label;
9302 Assembler::Predict predict_taken =
9303 cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
9304
9305 __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L);
9306 __ delayed()->nop();
9307 %}
9308 ins_pc_relative(1);
9309 ins_pipe(br_fcc);
9310 %}
9311
9312 instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
9313 match(CountedLoopEnd cmp icc);
9314 effect(USE labl);
9315
9316 size(8);
9317 ins_cost(BRANCH_COST);
9318 format %{ "BP$cmp $icc,$labl\t! Loop end" %}
9319 // Prim = bits 24-22, Secnd = bits 31-30
9320 ins_encode( enc_bp( labl, cmp, icc ) );
9321 ins_pc_relative(1);
9322 ins_pipe(br_cc);
9323 %}
9324
9325 instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
9326 match(CountedLoopEnd cmp icc);
9327 effect(USE labl);
9346 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the
9347 // NE test is negated from that.
9348
9349 // Due to a shortcoming in the ADLC, it mixes up expressions like:
9350 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the
9351 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections
9352 // are collapsed internally in the ADLC's dfa-gen code. The match for
9353 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
9354 // foo match ends up with the wrong leaf. One fix is to not match both
9355 // reg-reg and reg-zero forms of long-compare. This is unfortunate because
9356 // both forms beat the trinary form of long-compare and both are very useful
9357 // on Intel which has so few registers.
9358
9359 instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
9360 match(If cmp xcc);
9361 effect(USE labl);
9362
9363 size(8);
9364 ins_cost(BRANCH_COST);
9365 format %{ "BP$cmp $xcc,$labl" %}
9366 ins_encode %{
9367 Label* L = $labl$$label;
9368 Assembler::Predict predict_taken =
9369 cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
9370
9371 __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
9372 __ delayed()->nop();
9373 %}
9374 ins_pc_relative(1);
9375 ins_pipe(br_cc);
9376 %}
9377
9378 // Manifest a CmpL3 result in an integer register. Very painful.
9379 // This is the test to avoid.
9380 instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
9381 match(Set dst (CmpL3 src1 src2) );
9382 effect( KILL ccr );
9383 ins_cost(6*DEFAULT_COST);
9384 size(24);
9385 format %{ "CMP $src1,$src2\t\t! long\n"
9386 "\tBLT,a,pn done\n"
9387 "\tMOV -1,$dst\t! delay slot\n"
9388 "\tBGT,a,pn done\n"
9389 "\tMOV 1,$dst\t! delay slot\n"
9390 "\tCLR $dst\n"
9391 "done:" %}
9392 ins_encode( cmpl_flag(src1,src2,dst) );
9393 ins_pipe(cmpL_reg);
9672
9673 instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{
9674 match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero));
9675 effect( KILL idx, KILL o7 );
9676 ins_cost(DEFAULT_COST*10);
9677 format %{ "CALL PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %}
9678 ins_encode( enc_PartialSubtypeCheck() );
9679 ins_pipe(partial_subtype_check_pipe);
9680 %}
9681
9682
9683 // ============================================================================
9684 // inlined locking and unlocking
9685
9686 instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
9687 match(Set pcc (FastLock object box));
9688
9689 effect(KILL scratch, TEMP scratch2);
9690 ins_cost(100);
9691
9692 format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $box" %}
9693 ins_encode( Fast_Lock(object, box, scratch, scratch2) );
9694 ins_pipe(long_memory_op);
9695 %}
9696
9697
9698 instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
9699 match(Set pcc (FastUnlock object box));
9700 effect(KILL scratch, TEMP scratch2);
9701 ins_cost(100);
9702
9703 format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %}
9704 ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
9705 ins_pipe(long_memory_op);
9706 %}
9707
9708 // Count and Base registers are fixed because the allocator cannot
9709 // kill unknown registers. The encodings are generic.
9710 instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
9711 match(Set dummy (ClearArray cnt base));
9712 effect(TEMP temp, KILL ccr);
9713 ins_cost(300);
9714 format %{ "MOV $cnt,$temp\n"
9715 "loop: SUBcc $temp,8,$temp\t! Count down a dword of bytes\n"
9716 " BRge loop\t\t! Clearing loop\n"
9717 " STX G0,[$base+$temp]\t! delay slot" %}
9718 ins_encode( enc_Clear_Array(cnt, base, temp) );
9719 ins_pipe(long_memory_op);
9720 %}
9721
9722 instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
|