< prev index next >

src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page




4649   // Shift ary1 and ary2 to the end of the arrays, negate limit
4650   add(ary1, limit, ary1);
4651   add(ary2, limit, ary2);
4652   neg(limit, limit);
4653 
4654   lduw(ary1, limit, result);
4655   bind(Lloop);
4656   lduw(ary2, limit, tmp);
4657   cmp(result, tmp);
4658   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4659   delayed()->mov(G0, result);     // not equal
4660   inccc(limit, 2*sizeof(jchar));
4661   // annul LDUW if branch is not taken to prevent access past end of array
4662   br(Assembler::notZero, true, Assembler::pt, Lloop);
4663   delayed()->lduw(ary1, limit, result); // hoisted
4664 
4665   add(G0, 1, result); // equals
4666   bind(Ldone);
4667 }
4668 





































































































4669 #endif

4670 
4671 // Use BIS for zeroing (count is in bytes).
4672 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4673   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4674   Register end = count;
4675   int cache_line_size = VM_Version::prefetch_data_size();
4676   // Minimum count when BIS zeroing can be used since
4677   // it needs membar which is expensive.
4678   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4679 
4680   Label small_loop;
4681   // Check if count is negative (dead code) or zero.
4682   // Note, count uses 64bit in 64 bit VM.
4683   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4684 
4685   // Use BIS zeroing only for big arrays since it requires membar.
4686   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4687     cmp(count, block_zero_size);
4688   } else {
4689     set(block_zero_size, temp);




4649   // Shift ary1 and ary2 to the end of the arrays, negate limit
4650   add(ary1, limit, ary1);
4651   add(ary2, limit, ary2);
4652   neg(limit, limit);
4653 
4654   lduw(ary1, limit, result);
4655   bind(Lloop);
4656   lduw(ary2, limit, tmp);
4657   cmp(result, tmp);
4658   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4659   delayed()->mov(G0, result);     // not equal
4660   inccc(limit, 2*sizeof(jchar));
4661   // annul LDUW if branch is not taken to prevent access past end of array
4662   br(Assembler::notZero, true, Assembler::pt, Lloop);
4663   delayed()->lduw(ary1, limit, result); // hoisted
4664 
4665   add(G0, 1, result); // equals
4666   bind(Ldone);
4667 }
4668 
4669 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
4670 
4671   // test for negative bytes in input string of a given size
4672   // result 1 if found, 0 otherwise.
4673 
4674   Label Lcore, Ltail, Lreturn, Lcore_rpt;
4675 
4676   assert_different_registers(inp, size, t2, t3, t4, t5, result);
4677 
4678   Register i     = result;  // result used as integer index i until very end
4679   Register lmask = t2;      // t2 is aliased to lmask
4680 
4681   // INITIALIZATION
4682   // ===========================================================
4683   // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
4684   // compute unaligned offset -> i
4685   // compute core end index   -> t5
4686   Assembler::sethi(0x80808000, t2);   //! sethi macro fails to emit optimal
4687   add(t2, 0x80, t2);
4688   sllx(t2, 32, t3);
4689   or3(t3, t2, lmask);                 // 0x8080808080808080 -> lmask
4690   sra(size,0,size);
4691   andcc(inp, 0x7, i);                 // unaligned offset -> i
4692   br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
4693   delayed()->add(size, -8, t5);       // (annuled) core end index -> t5
4694 
4695   // ===========================================================
4696 
4697   // UNALIGNED HEAD
4698   // ===========================================================
4699   // * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
4700   // * obliterate (ignore) bytes outside string by shifting off reg ends
4701   // * compare with bitmask, short circuit return true if one or more high
4702   //   bits set.
4703   cmp(size, 0);
4704   br(Assembler::zero, true, Assembler::pn, Lreturn); // short-circuit?
4705   delayed()->mov(0,result);      // annuled so i not clobbered for following
4706   neg(i, t4);
4707   add(i, size, t5);
4708   sra(t4, 0, t4);
4709   ldx(inp, t4, t3);  // raw aligned 8B containing unaligned head -> t3
4710   mov(8, t4);
4711   sub(t4, t5, t4);
4712   sra(t4, 31, t5);
4713   andn(t4, t5, t5);
4714   add(i, t5, t4);
4715   sll(t5, 3, t5);
4716   sll(t4, 3, t4);   // # bits to shift right, left -> t5,t4
4717   srlx(t3, t5, t3);
4718   sllx(t3, t4, t3); // bytes outside string in 8B header obliterated -> t3
4719   andcc(lmask, t3, G0);
4720   brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
4721   delayed()->mov(1,result);      // annuled so i not clobbered for following
4722   add(size, -8, t5);             // core end index -> t5
4723   mov(8, t4);
4724   sub(t4, i, i);                 // # bytes examined in unalgn head (<8) -> i
4725   // ===========================================================
4726 
4727   // ALIGNED CORE
4728   // ===========================================================
4729   // * iterate index i over aligned 8B sections of core, comparing with
4730   //   bitmask, short circuit return true if one or more high bits set
4731   // t5 contains core end index/loop limit which is the index
4732   //     of the MSB of last (unaligned) 8B fully contained in the string.
4733   // inp   contains address of first byte in string/array
4734   // lmask contains 8B high bit mask for comparison
4735   // i     contains next index to be processed (adr. inp+i is on 8B boundary)
4736   bind(Lcore);
4737   cmp_and_br_short(i, t5, Assembler::greater, Assembler::pn, Ltail);
4738   bind(Lcore_rpt);
4739   ldx(inp, i, t3);
4740   andcc(t3, lmask, G0);
4741   brx(Assembler::notZero, true, Assembler::pn, Lreturn);
4742   delayed()->mov(1, result);    // annuled so i not clobbered for following
4743   add(i, 8, i);
4744   cmp_and_br_short(i, t5, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
4745   // ===========================================================
4746 
4747   // ALIGNED TAIL (<8B)
4748   // ===========================================================
4749   // handle aligned tail of 7B or less as complete 8B, obliterating end of
4750   // string bytes by shifting them off end, compare what's left with bitmask
4751   // inp   contains address of first byte in string/array
4752   // lmask contains 8B high bit mask for comparison
4753   // i     contains next index to be processed (adr. inp+i is on 8B boundary)
4754   bind(Ltail);
4755   subcc(size, i, t4);   // # of remaining bytes in string -> t4
4756   // return 0 if no more remaining bytes
4757   br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
4758   delayed()->mov(0, result); // annuled so i not clobbered for following
4759   ldx(inp, i, t3);       // load final 8B (aligned) containing tail -> t3
4760   mov(8, t5);
4761   sub(t5, t4, t4);
4762   mov(0, result);        // ** i clobbered at this point
4763   sll(t4, 3, t4);        // bits beyond end of string          -> t4
4764   srlx(t3, t4, t3);      // bytes beyond end now obliterated   -> t3
4765   andcc(lmask, t3, G0);
4766   movcc(Assembler::notZero, false, xcc,  1, result);
4767   bind(Lreturn);
4768 }
4769 
4770 #endif
4771 
4772 
4773 // Use BIS for zeroing (count is in bytes).
4774 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4775   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4776   Register end = count;
4777   int cache_line_size = VM_Version::prefetch_data_size();
4778   // Minimum count when BIS zeroing can be used since
4779   // it needs membar which is expensive.
4780   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4781 
4782   Label small_loop;
4783   // Check if count is negative (dead code) or zero.
4784   // Note, count uses 64bit in 64 bit VM.
4785   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4786 
4787   // Use BIS zeroing only for big arrays since it requires membar.
4788   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4789     cmp(count, block_zero_size);
4790   } else {
4791     set(block_zero_size, temp);


< prev index next >