< prev index next >

src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page




4649   // Shift ary1 and ary2 to the end of the arrays, negate limit
4650   add(ary1, limit, ary1);
4651   add(ary2, limit, ary2);
4652   neg(limit, limit);
4653 
4654   lduw(ary1, limit, result);
4655   bind(Lloop);
4656   lduw(ary2, limit, tmp);
4657   cmp(result, tmp);
4658   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4659   delayed()->mov(G0, result);     // not equal
4660   inccc(limit, 2*sizeof(jchar));
4661   // annul LDUW if branch is not taken to prevent access past end of array
4662   br(Assembler::notZero, true, Assembler::pt, Lloop);
4663   delayed()->lduw(ary1, limit, result); // hoisted
4664 
4665   add(G0, 1, result); // equals
4666   bind(Ldone);
4667 }
4668 

































































































4669 #endif

4670 
4671 // Use BIS for zeroing (count is in bytes).
4672 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4673   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4674   Register end = count;
4675   int cache_line_size = VM_Version::prefetch_data_size();
4676   // Minimum count when BIS zeroing can be used since
4677   // it needs membar which is expensive.
4678   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4679 
4680   Label small_loop;
4681   // Check if count is negative (dead code) or zero.
4682   // Note, count uses 64bit in 64 bit VM.
4683   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4684 
4685   // Use BIS zeroing only for big arrays since it requires membar.
4686   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4687     cmp(count, block_zero_size);
4688   } else {
4689     set(block_zero_size, temp);




4649   // Shift ary1 and ary2 to the end of the arrays, negate limit
4650   add(ary1, limit, ary1);
4651   add(ary2, limit, ary2);
4652   neg(limit, limit);
4653 
4654   lduw(ary1, limit, result);
4655   bind(Lloop);
4656   lduw(ary2, limit, tmp);
4657   cmp(result, tmp);
4658   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4659   delayed()->mov(G0, result);     // not equal
4660   inccc(limit, 2*sizeof(jchar));
4661   // annul LDUW if branch is not taken to prevent access past end of array
4662   br(Assembler::notZero, true, Assembler::pt, Lloop);
4663   delayed()->lduw(ary1, limit, result); // hoisted
4664 
4665   add(G0, 1, result); // equals
4666   bind(Ldone);
4667 }
4668 
4669 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5, Register t6) {
4670 
4671   // test for negative bytes in input string of a given size
4672   // result 1 if found, 0 otherwise.
4673 
4674   Label Lcore, Ltail, Lreturn, Lcore_rpt;
4675 
4676   assert_different_registers(inp, size, t2, t3, t4, t5, t6, result);
4677 
4678   Register i     = result;  // result used as integer index i until very end
4679   Register lmask = t2;      // t2 is aliased to lmask
4680 
4681   // INITIALIZATION
4682   // ===========================================================
4683   // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
4684   // compute unaligned offset -> i
4685   // compute core end index   -> t6
4686   Assembler::sethi(0x80808000, t3);   //! sethi macro fails to emit optimal
4687   add(t3, 0x80, t2);
4688   sllx(t2, 32, t5);
4689   or3(t5, t2, lmask);                 // 0x8080808080808080 -> lmask
4690   andcc(inp, 0x7, i);                 // unaligned offset -> i
4691   br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
4692   delayed()->add(size, -8, t6);       // (annuled) core end index -> t6
4693 
4694   // ===========================================================
4695 
4696   // UNALIGNED HEAD
4697   // ===========================================================
4698   // * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
4699   // * obliterate (ignore) bytes outside string by shifting off reg ends
4700   // * compare with bitmask, short circuit return true if one or more high
4701   //   bits set.
4702   neg(i, t3);
4703   add(i, size, t5);
4704   sra(t3, 0, t4);
4705   ldx(inp, t4, t3);  // raw aligned 8B containing unaligned head -> t3
4706   mov(8, t4);
4707   sub(t4, t5, t4);
4708   sra(t4, 31, t6);
4709   andn(t4, t6, t5);
4710   add(i, t5, t4);
4711   sll(t5, 3, t5);
4712   sll(t4, 3, t6);   // # bits to shift right, left -> t5,t6
4713   srlx(t3, t5, t4);
4714   sllx(t4, t6, t3); // bytes outside string in 8B header obliterated -> t3
4715   andcc(lmask, t3, G0);
4716   brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
4717   delayed()->mov(1,result);      // annuled so i not clobbered for following
4718   add(size, -8, t6);             // core end index -> t6
4719   mov(8, t4);
4720   sub(t4, i, i);                 // # bytes examined in unalgn head (<8) -> i
4721   // ===========================================================
4722 
4723   // ALIGNED CORE
4724   // ===========================================================
4725   // * iterate index i over aligned 8B sections of core, comparing with
4726   //   bitmask, short circuit return true if one or more high bits set
4727   // t6 contains core end index/loop limit which is the index
4728   //     of the MSB of last (unaligned) 8B fully contained in the string.
4729   // inp   contains address of first byte in string/array
4730   // lmask contains 8B high bit mask for comparison
4731   // i     contains next index to be processed (adr. inp+i is on 8B boundary)
4732   bind(Lcore);
4733   cmp_and_br_short(i, t6, Assembler::greater, Assembler::pn, Ltail);
4734   bind(Lcore_rpt);
4735   ldx(i, inp, t3);
4736   andcc(t3, lmask, G0);
4737   brx(Assembler::notZero, true, Assembler::pn, Lreturn);
4738   delayed()->mov(1, result);
4739   add(i, 8, i);
4740   cmp_and_br_short(i, t6, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
4741   // ===========================================================
4742 
4743   // ALIGNED TAIL (<8B)
4744   // ===========================================================
4745   // handle aligned tail of 7B or less as complete 8B, obliterating end of
4746   // string bytes by shifting them off end, compare what's left with bitmask
4747   // inp   contains address of first byte in string/array
4748   // lmask contains 8B high bit mask for comparison
4749   // i     contains next index to be processed (adr. inp+i is on 8B boundary)
4750   bind(Ltail);
4751   subcc(size, i, t4);   // # of remaining bytes in string -> t4
4752   // return 0 if no more remaining bytes
4753   br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
4754   delayed()->mov(0, result); // annuled: so i not clobbered for following load
4755   ldx(inp, i, t6);       // load final 8B (aligned) containing tail -> t6
4756   mov(8, t5);
4757   sub(t5, t4, t3);
4758   mov(0, result);        // ** i clobbered at this point
4759   sll(t3, 3, t5);        // bits beyond end of string          -> t5
4760   srlx(t6, t5, t3);      // bytes beyond end now obliterated   -> t3
4761   andcc(lmask, t3, G0);
4762   movcc(Assembler::notZero, false, xcc,  1, result);
4763   bind(Lreturn);
4764 }
4765 
4766 #endif
4767 
4768 
4769 // Use BIS for zeroing (count is in bytes).
4770 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4771   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4772   Register end = count;
4773   int cache_line_size = VM_Version::prefetch_data_size();
4774   // Minimum count when BIS zeroing can be used since
4775   // it needs membar which is expensive.
4776   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4777 
4778   Label small_loop;
4779   // Check if count is negative (dead code) or zero.
4780   // Note, count uses 64bit in 64 bit VM.
4781   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4782 
4783   // Use BIS zeroing only for big arrays since it requires membar.
4784   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4785     cmp(count, block_zero_size);
4786   } else {
4787     set(block_zero_size, temp);


< prev index next >