src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page
rev 10891 : tweak array comparison loops


4534   delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
4535 
4536   // If strings are equal up to min length, return the length difference.
4537   if (ae == StrIntrinsicNode::UU) {
4538     // Divide by 2 to get number of chars
4539     sra(diff, 1, result);
4540   } else {
4541     mov(diff, result);
4542   }
4543 
4544   // Otherwise, return the difference between the first mismatched chars.
4545   bind(Ldone);
4546   if(ae == StrIntrinsicNode::UL) {
4547     // Negate result (see note above)
4548     neg(result);
4549   }
4550 }
4551 
4552 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
4553                                   Register limit, Register tmp, Register result, bool is_byte) {
4554   Label Ldone, Lword;
4555   assert_different_registers(ary1, ary2, limit, tmp, result);

4556 
4557   int length_offset  = arrayOopDesc::length_offset_in_bytes();
4558   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);

4559 
4560   if (is_array_equ) {
4561     // return true if the same array
4562     cmp(ary1, ary2);
4563     brx(Assembler::equal, true, Assembler::pn, Ldone);
4564     delayed()->mov(1, result);  // equal
4565 
4566     br_null(ary1, true, Assembler::pn, Ldone);
4567     delayed()->clr(result);     // not equal
4568 
4569     br_null(ary2, true, Assembler::pn, Ldone);
4570     delayed()->clr(result);     // not equal
4571 
4572     // load the lengths of arrays
4573     ld(Address(ary1, length_offset), limit);
4574     ld(Address(ary2, length_offset), tmp);
4575 
4576     // return false if the two arrays are not equal length
4577     cmp(limit, tmp);
4578     br(Assembler::notEqual, true, Assembler::pn, Ldone);
4579     delayed()->clr(result);     // not equal
4580   }
4581 
4582   cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
4583   delayed()->mov(1, result); // zero-length arrays are equal
4584 
4585   if (is_array_equ) {
4586     // load array addresses
4587     add(ary1, base_offset, ary1);
4588     add(ary2, base_offset, ary2);
4589     // set byte count
4590     if (!is_byte) {
4591       sll(limit, exact_log2(sizeof(jchar)), limit);
4592     }
4593   } else {
4594     // We have no guarantee that on 64 bit the higher half of limit is 0
4595     signx(limit);
4596   }
4597 


4598   // Check for doubleword (8 byte) alignment of ary1 and ary2
4599   or3(ary1, ary2, tmp);
4600   andcc(tmp, 7, tmp);



4601   br_notnull_short(tmp, Assembler::pn, Lword);
4602 
4603   // Aligned, perform doubleword comparison
4604   array_equals_loop(ary1, ary2, limit, tmp, result, 8, Ldone);
4605   ba(Ldone);
4606   delayed()->movcc(Assembler::equal, false, xcc, 1, result);
4607 
4608   bind(Lword);
4609   // Unaligned, perform word comparison (word alignment is guaranteed)
4610   array_equals_loop(ary1, ary2, limit, tmp, result, 4, Ldone);
4611   movcc(Assembler::equal, false, icc, 1, result);





4612 
4613   bind(Ldone);
4614 }
4615 
4616 // Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly.
4617 void MacroAssembler::array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp,
4618                                        Register result, size_t byte_width, Label& Ldone) {
4619   Label Lloop, Lremaining;







4620   // Use appropriate CC register depending on byte_width
4621   Assembler::CC cc = (byte_width == 8) ? xcc : icc;
4622 









































4623   // Shift ary1 and ary2 to the end of the arrays, negate limit
4624   add(ary1, limit, ary1);
4625   add(ary2, limit, ary2);
4626   neg(limit, limit);






















































4627 
4628   // MAIN LOOP
4629   // Load and compare array elements of size 'byte_width' until the elements are not
4630   // equal or we reached the end of the arrays. If the size of the arrays is not a
4631   // multiple of 'byte_width', we simply read over the end of the array, bail out and
4632   // compare the remaining bytes below by skipping the garbage bytes.
4633   load_sized_value(Address(ary1, limit), result, byte_width, false);
4634   bind(Lloop);
4635   load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4636   inccc(limit, byte_width);
4637   // Bail out if we reached the end (but still do the comparison)
4638   br(Assembler::positive, false, Assembler::pn, Lremaining);
4639   delayed()->cmp(result, tmp);
4640   // Check equality of elements
4641   bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop));
4642   delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false);
4643 
4644   ba(Ldone);
4645   delayed()->clr(result); // not equal
4646 
4647   // TAIL COMPARISON
4648   // We got here because we reached the end of the arrays. 'limit' is the number of
4649   // garbage bytes we may have compared by reading over the end of the arrays. Shift
4650   // out the garbage and compare the remaining elements.



4651   bind(Lremaining);
4652   // Optimistic shortcut: elements potentially including garbage are equal
4653   bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone));
4654   delayed()->mov(1, result); // equal
4655   // Shift 'limit' bytes to the right and compare
4656   sll(limit, 3, limit); // bytes to bits
4657   srlx(result, limit, result);
4658   srlx(tmp, limit, tmp);
4659   cmp(result, tmp);
4660   clr(result);
4661   // CC register contains result










4662 }
4663 
4664 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
4665 
4666   // test for negative bytes in input string of a given size
4667   // result 1 if found, 0 otherwise.
4668 
4669   Label Lcore, Ltail, Lreturn, Lcore_rpt;
4670 
4671   assert_different_registers(inp, size, t2, t3, t4, t5, result);
4672 
4673   Register i     = result;  // result used as integer index i until very end
4674   Register lmask = t2;      // t2 is aliased to lmask
4675 
4676   // INITIALIZATION
4677   // ===========================================================
4678   // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
4679   // compute unaligned offset -> i
4680   // compute core end index   -> t5
4681   Assembler::sethi(0x80808000, t2);   //! sethi macro fails to emit optimal




4534   delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
4535 
4536   // If strings are equal up to min length, return the length difference.
4537   if (ae == StrIntrinsicNode::UU) {
4538     // Divide by 2 to get number of chars
4539     sra(diff, 1, result);
4540   } else {
4541     mov(diff, result);
4542   }
4543 
4544   // Otherwise, return the difference between the first mismatched chars.
4545   bind(Ldone);
4546   if(ae == StrIntrinsicNode::UL) {
4547     // Negate result (see note above)
4548     neg(result);
4549   }
4550 }
4551 
4552 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
4553                                   Register limit, Register tmp, Register result, bool is_byte) {
4554   Label Ldone, Lword, Lmisaligned;
4555   assert_different_registers(ary1, ary2, limit, tmp, result);
4556   Register tmp2 = result; // may be used as a temp also
4557 
4558   int length_offset  = arrayOopDesc::length_offset_in_bytes();
4559   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
4560   const int short_length = 16;  // at this size or smaller we don't bother to use 64-bit fetches
4561 
4562   if (is_array_equ) {
4563     // return true if the same array
4564     cmp(ary1, ary2);
4565     brx(Assembler::equal, true, Assembler::pn, Ldone);
4566     delayed()->mov(1, result);  // equal
4567 
4568     br_null(ary1, true, Assembler::pn, Ldone);
4569     delayed()->clr(result);     // not equal
4570 
4571     br_null(ary2, true, Assembler::pn, Ldone);
4572     delayed()->clr(result);     // not equal
4573 
4574     // load the lengths of arrays
4575     ld(Address(ary1, length_offset), limit);
4576     ld(Address(ary2, length_offset), tmp);
4577 
4578     // return false if the two arrays are not equal length
4579     cmp(limit, tmp);
4580     br(Assembler::notEqual, true, Assembler::pn, Ldone);
4581     delayed()->clr(result);     // not equal
4582   }
4583 
4584   cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
4585   delayed()->mov(1, result); // zero-length arrays are equal
4586 
4587   if (is_array_equ) {
4588     // load array addresses
4589     add(ary1, base_offset, ary1);
4590     add(ary2, base_offset, ary2);
4591     // set byte count
4592     if (!is_byte) {
4593       sll(limit, exact_log2(sizeof(jchar)), limit);
4594     }
4595   } else {
4596     // We have no guarantee that on 64 bit the higher half of limit is 0
4597     signx(limit);
4598   }
4599 
4600   // Check for a short length (16 or less).
4601   sub(limit, short_length+1, tmp2);
4602   // Check for doubleword (8 byte) alignment of ary1 and ary2
4603   or3(ary1, ary2, tmp);
4604   srax(tmp2, 63, tmp2);  // = (limit<=16) ? -1 : 0
4605   and(tmp, 7, tmp);      // = (ary1%8 | ary2%8)
4606   or(tmp, tmp2, tmp);
4607 
4608   br_notnull_short(tmp, Assembler::pn, Lword);
4609 
4610   // Aligned, perform doubleword comparison
4611   array_equals_loop(ary1, ary2, limit, tmp, result, 8, &Ldone);


4612 
4613   bind(Lword);
4614   cmp_and_brx_short(tmp, 0, Assembler::greater, Assembler::pn, Lmisaligned);
4615   
4616   // Short count, perform word comparison (word alignment is guaranteed)
4617   array_equals_loop(ary1, ary2, limit, tmp, result, 4, &Ldone);
4618 
4619   bind(Lmisaligned);
4620   // Unaligned doubleword comparison (word alignment is guaranteed)
4621   array_equals_loop(ary1, ary2, limit, tmp, result, 8+4, NULL);
4622 
4623   bind(Ldone);
4624 }
4625 
4626 // Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly.
4627 void MacroAssembler::array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp,
4628                                        Register result, size_t byte_width, Label* Ldone_or_null) {
4629   bool misaligned = (byte_width == 8+4);
4630   if (misaligned)  byte_width = 8;
4631   assert(byte_width == 4 || byte_width == 8, "");
4632 
4633   Register word1 = misaligned ? O7 : noreg;
4634   assert_different_registers(ary1, ary2, limit, tmp, result, word1);
4635 
4636   Label Lloop, Lremaining, Lfallthrough;
4637   // Use appropriate CC register depending on byte_width
4638   Assembler::CC cc = (byte_width == 8) ? xcc : icc;
4639 
4640   Label& Ldone = *((Ldone_or_null != NULL) ? Ldone_or_null : &Lfallthrough)
4641 
4642   if (misaligned) {
4643     // Test for co-alignment.
4644     Label Lswap, Lskewed, Lskloop, Lcleanup, Lfallthrough;
4645     btst(4, ary2);
4646     brx(Assembler::zero, false, Assembler::pn, Lskewed);
4647     delayed()->load_sized_value(Address(ary1, 0), word1, byte_width/2, false);
4648     // if ary2 is even, then assume ary1 is odd and start the loop right away
4649 
4650     // ary2 is odd, so what about ary1?
4651     btst(4, ary1);
4652     brx(Assembler::zero, false, Assembler::pn, Lswap);
4653     delayed()->load_sized_value(Address(ary2, 0), result, byte_width/2, false);
4654 
4655     // Both are odd.  Compare a common first word and go aligned.
4656     cmp(result, word1);
4657     // Check equality of elements
4658     bp(Assembler::notEqual, false, cc, Assembler::pn, Ldone);
4659     delayed()->clr(result); // not equal
4660 
4661     add(ary1, 4, ary1);
4662     add(ary2, 4, ary2);
4663     br(Assembler::always, false, Assembler::pt, Lfallthrough);
4664     delayed()->sub(limit, 4, limit);
4665     // Finish the loop in 64-bit chunks.
4666     // (Caller is responsible to ensure that limit-4 is positive.)
4667 
4668     bind(Lswap);
4669     mov(result, word1);  // grab loaded half-word into correct register
4670     // ary1 is odd and ary2 is even, so swap them
4671     mov(ary1, tmp);
4672     mov(ary2, ary1);
4673     mov(tmp, ary2);
4674     // and fall through to skewed loop
4675 
4676     bind(Lskewed);
4677     // - ary1 is 4 (mod 8)
4678     // - ary2 is 0 (mod 8)
4679     // - word1 (low-order 32 bits) is ((int*)ary1)[0]
4680 
4681     // Shift ary1 and ary2 to the end of the arrays, negate limit
4682     add(ary1, limit, ary1);
4683     add(ary2, limit, ary2);
4684     neg(limit, limit);
4685     // Align ary1 by pushing it ahead of word1:
4686     add(ary1, byte_width/2, ary1);
4687 
4688     bind(Lskloop);
4689     // SKEWED MAIN LOOP
4690     // Load and compare skewed array elements of size 8 until the elements are not
4691     // equal or we reached the end of the arrays.  Loop cleanup (in the case of
4692     // a remainder of 1..7 bytes) is handled in common with the aligned loop.
4693     sllx(word1, 32, result);  // put word1 payload into MSW position
4694     // we already have word1; now fetch word2 and word3 (in one 64-bit chunk)
4695     { Register word23 = word1;  // reuse temp locally
4696       load_sized_value(Address(ary1, limit), word23, byte_width, false);
4697       srlx(word23, 32, tmp);  // put word2 payload into LSW position
4698       or(result, tmp, result);  // materialize *(unaligned long)(ary1+limit-4) = [word1|word2]
4699       // and, the LSW of word1 (= word23) now contains word3, so we are good
4700     }
4701     load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4702     // To avoid accidents, ease out of this loop when we have 12 or fewer bytes left.
4703     cmp_and_br_short(limit, -(byte_with*3/2), Assembler::greaterEqual, Assembler::pn, Lofframp);
4704     cmp(result, tmp);
4705     // Check equality of elements
4706     bp(Assembler::equal, false, cc, Assembler::pt, target(Lskloop));
4707     delayed()->inc(limit, byte_width);
4708 
4709     ba(Ldone);
4710     delayed()->clr(result); // not equal
4711 
4712     bind(Lofframp);
4713     // limit is in the range [-12..-4], and there are 4..12 bytes left 
4714     inccc(limit, byte_width);
4715     // Bail out immediately if there are 4..8 bytes left.
4716     br(Assembler::positive, false, Assembler::pn, Lremaining);
4717     delayed()->xorcc(tmp, result, tmp);
4718 
4719     // There are 9..12 bytes left, so first handle the final 64-bit chunk
4720     bp(Assembler::notEqual, true, cc, Assembler::pt, Ldone);
4721     delayed()->clr(result); // not equal
4722 
4723     // There are 1..4 bytes left now.
4724     sllx(word1, 32, result);  // put word1 payload into MSW position
4725     load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4726     // No need to increment limit by 8; only the bottom 3 bits are significant.
4727     ba(Lremaining);
4728     delayed()->xorcc(tmp, result, tmp);
4729 
4730     bind(Lfallthrough);
4731     // fall through to normal case
4732   }
4733 
4734   // Shift ary1 and ary2 to the end of the arrays, negate limit
4735   // (Caller is responsible to ensure that limit starts out non-zero.)
4736   add(ary1, limit, ary1);
4737   add(ary2, limit, ary2);
4738   neg(limit, limit);
4739 
4740   // MAIN LOOP
4741   // Load and compare array elements of size 'byte_width' until the elements are not
4742   // equal or we reached the end of the arrays. If the size of the arrays is not a
4743   // multiple of 'byte_width', we simply read over the end of the array, bail out and
4744   // compare the remaining bytes below by skipping the garbage bytes.
4745   load_sized_value(Address(ary1, limit), result, byte_width, false);
4746   bind(Lloop);
4747   load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4748   inccc(limit, byte_width);
4749   // Bail out if we reached the end (but still do the comparison)
4750   br(Assembler::positive, false, Assembler::pn, Lremaining);
4751   delayed()->xorcc(tmp, result, tmp);
4752   // Check equality of elements
4753   bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop));
4754   delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false);
4755 
4756   ba(Ldone);
4757   delayed()->clr(result); // not equal
4758 
4759   // TAIL COMPARISON
4760   // We got here because we reached the end of the arrays. 'limit' is the number of
4761   // garbage bytes we may have compared by reading over the end of the arrays. Shift
4762   // out the garbage and compare the remaining elements.
4763   // The elements are pre-compared bitwise, in that result has been xored into tmp.
4764   // Also, in the following code, only the low 2-3 bits of 'limit' are significant.
4765   // A 'limit' value of either 0 or byte_width means "preserve all bits".
4766   bind(Lremaining);
4767   // Optimistic shortcut: elements potentially including garbage are equal
4768   bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone));
4769   delayed()->mov(1, result); // equal
4770   // Shift 'limit' bytes to the right and compare
4771   sll(limit, 3, limit); // bytes to bits



4772   clr(result);
4773   if (cc == icc) {
4774     srl(tmp, limit, tmp);
4775   } else {
4776     srlx(tmp, limit, tmp);
4777   }
4778   if (&Ldone != &Lfallthrough) {
4779     ba(Ldone);
4780     delayed(); // ->movr(...)
4781   }
4782   movr(tmp, Assembler::rc_z, 1, result);  // may be a delay slot instruction
4783   bind(Lfallthrough);
4784 }
4785 
4786 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
4787 
4788   // test for negative bytes in input string of a given size
4789   // result 1 if found, 0 otherwise.
4790 
4791   Label Lcore, Ltail, Lreturn, Lcore_rpt;
4792 
4793   assert_different_registers(inp, size, t2, t3, t4, t5, result);
4794 
4795   Register i     = result;  // result used as integer index i until very end
4796   Register lmask = t2;      // t2 is aliased to lmask
4797 
4798   // INITIALIZATION
4799   // ===========================================================
4800   // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
4801   // compute unaligned offset -> i
4802   // compute core end index   -> t5
4803   Assembler::sethi(0x80808000, t2);   //! sethi macro fails to emit optimal