4534 delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
4535
4536 // If strings are equal up to min length, return the length difference.
4537 if (ae == StrIntrinsicNode::UU) {
4538 // Divide by 2 to get number of chars
4539 sra(diff, 1, result);
4540 } else {
4541 mov(diff, result);
4542 }
4543
4544 // Otherwise, return the difference between the first mismatched chars.
4545 bind(Ldone);
4546 if(ae == StrIntrinsicNode::UL) {
4547 // Negate result (see note above)
4548 neg(result);
4549 }
4550 }
4551
4552 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
4553 Register limit, Register tmp, Register result, bool is_byte) {
4554 Label Ldone, Lword;
4555 assert_different_registers(ary1, ary2, limit, tmp, result);
4556
4557 int length_offset = arrayOopDesc::length_offset_in_bytes();
4558 int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
4559
4560 if (is_array_equ) {
4561 // return true if the same array
4562 cmp(ary1, ary2);
4563 brx(Assembler::equal, true, Assembler::pn, Ldone);
4564 delayed()->mov(1, result); // equal
4565
4566 br_null(ary1, true, Assembler::pn, Ldone);
4567 delayed()->clr(result); // not equal
4568
4569 br_null(ary2, true, Assembler::pn, Ldone);
4570 delayed()->clr(result); // not equal
4571
4572 // load the lengths of arrays
4573 ld(Address(ary1, length_offset), limit);
4574 ld(Address(ary2, length_offset), tmp);
4575
4576 // return false if the two arrays are not equal length
4577 cmp(limit, tmp);
4578 br(Assembler::notEqual, true, Assembler::pn, Ldone);
4579 delayed()->clr(result); // not equal
4580 }
4581
4582 cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
4583 delayed()->mov(1, result); // zero-length arrays are equal
4584
4585 if (is_array_equ) {
4586 // load array addresses
4587 add(ary1, base_offset, ary1);
4588 add(ary2, base_offset, ary2);
4589 // set byte count
4590 if (!is_byte) {
4591 sll(limit, exact_log2(sizeof(jchar)), limit);
4592 }
4593 } else {
4594 // We have no guarantee that on 64 bit the higher half of limit is 0
4595 signx(limit);
4596 }
4597
4598 // Check for doubleword (8 byte) alignment of ary1 and ary2
4599 or3(ary1, ary2, tmp);
4600 andcc(tmp, 7, tmp);
4601 br_notnull_short(tmp, Assembler::pn, Lword);
4602
4603 // Aligned, perform doubleword comparison
4604 array_equals_loop(ary1, ary2, limit, tmp, result, 8, Ldone);
4605 ba(Ldone);
4606 delayed()->movcc(Assembler::equal, false, xcc, 1, result);
4607
4608 bind(Lword);
4609 // Unaligned, perform word comparison (word alignment is guaranteed)
4610 array_equals_loop(ary1, ary2, limit, tmp, result, 4, Ldone);
4611 movcc(Assembler::equal, false, icc, 1, result);
4612
4613 bind(Ldone);
4614 }
4615
4616 // Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly.
4617 void MacroAssembler::array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp,
4618 Register result, size_t byte_width, Label& Ldone) {
4619 Label Lloop, Lremaining;
4620 // Use appropriate CC register depending on byte_width
4621 Assembler::CC cc = (byte_width == 8) ? xcc : icc;
4622
4623 // Shift ary1 and ary2 to the end of the arrays, negate limit
4624 add(ary1, limit, ary1);
4625 add(ary2, limit, ary2);
4626 neg(limit, limit);
4627
4628 // MAIN LOOP
4629 // Load and compare array elements of size 'byte_width' until the elements are not
4630 // equal or we reached the end of the arrays. If the size of the arrays is not a
4631 // multiple of 'byte_width', we simply read over the end of the array, bail out and
4632 // compare the remaining bytes below by skipping the garbage bytes.
4633 load_sized_value(Address(ary1, limit), result, byte_width, false);
4634 bind(Lloop);
4635 load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4636 inccc(limit, byte_width);
4637 // Bail out if we reached the end (but still do the comparison)
4638 br(Assembler::positive, false, Assembler::pn, Lremaining);
4639 delayed()->cmp(result, tmp);
4640 // Check equality of elements
4641 bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop));
4642 delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false);
4643
4644 ba(Ldone);
4645 delayed()->clr(result); // not equal
4646
4647 // TAIL COMPARISON
4648 // We got here because we reached the end of the arrays. 'limit' is the number of
4649 // garbage bytes we may have compared by reading over the end of the arrays. Shift
4650 // out the garbage and compare the remaining elements.
4651 bind(Lremaining);
4652 // Optimistic shortcut: elements potentially including garbage are equal
4653 bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone));
4654 delayed()->mov(1, result); // equal
4655 // Shift 'limit' bytes to the right and compare
4656 sll(limit, 3, limit); // bytes to bits
4657 srlx(result, limit, result);
4658 srlx(tmp, limit, tmp);
4659 cmp(result, tmp);
4660 clr(result);
4661 // CC register contains result
4662 }
4663
4664 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
4665
4666 // test for negative bytes in input string of a given size
4667 // result 1 if found, 0 otherwise.
4668
4669 Label Lcore, Ltail, Lreturn, Lcore_rpt;
4670
4671 assert_different_registers(inp, size, t2, t3, t4, t5, result);
4672
4673 Register i = result; // result used as integer index i until very end
4674 Register lmask = t2; // t2 is aliased to lmask
4675
4676 // INITIALIZATION
4677 // ===========================================================
4678 // initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
4679 // compute unaligned offset -> i
4680 // compute core end index -> t5
4681 Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
|
4534 delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false);
4535
4536 // If strings are equal up to min length, return the length difference.
4537 if (ae == StrIntrinsicNode::UU) {
4538 // Divide by 2 to get number of chars
4539 sra(diff, 1, result);
4540 } else {
4541 mov(diff, result);
4542 }
4543
4544 // Otherwise, return the difference between the first mismatched chars.
4545 bind(Ldone);
4546 if(ae == StrIntrinsicNode::UL) {
4547 // Negate result (see note above)
4548 neg(result);
4549 }
4550 }
4551
4552 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
4553 Register limit, Register tmp, Register result, bool is_byte) {
4554 Label Ldone, Lword, Lmisaligned;
4555 assert_different_registers(ary1, ary2, limit, tmp, result);
4556 Register tmp2 = result; // may be used as a temp also
4557
4558 int length_offset = arrayOopDesc::length_offset_in_bytes();
4559 int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
4560 const int short_length = 16; // at this size or smaller we don't bother to use 64-bit fetches
4561
4562 if (is_array_equ) {
4563 // return true if the same array
4564 cmp(ary1, ary2);
4565 brx(Assembler::equal, true, Assembler::pn, Ldone);
4566 delayed()->mov(1, result); // equal
4567
4568 br_null(ary1, true, Assembler::pn, Ldone);
4569 delayed()->clr(result); // not equal
4570
4571 br_null(ary2, true, Assembler::pn, Ldone);
4572 delayed()->clr(result); // not equal
4573
4574 // load the lengths of arrays
4575 ld(Address(ary1, length_offset), limit);
4576 ld(Address(ary2, length_offset), tmp);
4577
4578 // return false if the two arrays are not equal length
4579 cmp(limit, tmp);
4580 br(Assembler::notEqual, true, Assembler::pn, Ldone);
4581 delayed()->clr(result); // not equal
4582 }
4583
4584 cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
4585 delayed()->mov(1, result); // zero-length arrays are equal
4586
4587 if (is_array_equ) {
4588 // load array addresses
4589 add(ary1, base_offset, ary1);
4590 add(ary2, base_offset, ary2);
4591 // set byte count
4592 if (!is_byte) {
4593 sll(limit, exact_log2(sizeof(jchar)), limit);
4594 }
4595 } else {
4596 // We have no guarantee that on 64 bit the higher half of limit is 0
4597 signx(limit);
4598 }
4599
4600 // Check for a short length (16 or less).
4601 sub(limit, short_length+1, tmp2);
4602 // Check for doubleword (8 byte) alignment of ary1 and ary2
4603 or3(ary1, ary2, tmp);
4604 srax(tmp2, 63, tmp2); // = (limit<=16) ? -1 : 0
4605 and(tmp, 7, tmp); // = (ary1%8 | ary2%8)
4606 or(tmp, tmp2, tmp);
4607
4608 br_notnull_short(tmp, Assembler::pn, Lword);
4609
4610 // Aligned, perform doubleword comparison
4611 array_equals_loop(ary1, ary2, limit, tmp, result, 8, &Ldone);
4612
4613 bind(Lword);
4614 cmp_and_brx_short(tmp, 0, Assembler::greater, Assembler::pn, Lmisaligned);
4615
4616 // Short count, perform word comparison (word alignment is guaranteed)
4617 array_equals_loop(ary1, ary2, limit, tmp, result, 4, &Ldone);
4618
4619 bind(Lmisaligned);
4620 // Unaligned doubleword comparison (word alignment is guaranteed)
4621 array_equals_loop(ary1, ary2, limit, tmp, result, 8+4, NULL);
4622
4623 bind(Ldone);
4624 }
4625
4626 // Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly.
4627 void MacroAssembler::array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp,
4628 Register result, size_t byte_width, Label* Ldone_or_null) {
4629 bool misaligned = (byte_width == 8+4);
4630 if (misaligned) byte_width = 8;
4631 assert(byte_width == 4 || byte_width == 8, "");
4632
4633 Register word1 = misaligned ? O7 : noreg;
4634 assert_different_registers(ary1, ary2, limit, tmp, result, word1);
4635
4636 Label Lloop, Lremaining, Lfallthrough;
4637 // Use appropriate CC register depending on byte_width
4638 Assembler::CC cc = (byte_width == 8) ? xcc : icc;
4639
4640 Label& Ldone = *((Ldone_or_null != NULL) ? Ldone_or_null : &Lfallthrough)
4641
4642 if (misaligned) {
4643 // Test for co-alignment.
4644 Label Lswap, Lskewed, Lskloop, Lcleanup, Lfallthrough;
4645 btst(4, ary2);
4646 brx(Assembler::zero, false, Assembler::pn, Lskewed);
4647 delayed()->load_sized_value(Address(ary1, 0), word1, byte_width/2, false);
4648 // if ary2 is even, then assume ary1 is odd and start the loop right away
4649
4650 // ary2 is odd, so what about ary1?
4651 btst(4, ary1);
4652 brx(Assembler::zero, false, Assembler::pn, Lswap);
4653 delayed()->load_sized_value(Address(ary2, 0), result, byte_width/2, false);
4654
4655 // Both are odd. Compare a common first word and go aligned.
4656 cmp(result, word1);
4657 // Check equality of elements
4658 bp(Assembler::notEqual, false, cc, Assembler::pn, Ldone);
4659 delayed()->clr(result); // not equal
4660
4661 add(ary1, 4, ary1);
4662 add(ary2, 4, ary2);
4663 br(Assembler::always, false, Assembler::pt, Lfallthrough);
4664 delayed()->sub(limit, 4, limit);
4665 // Finish the loop in 64-bit chunks.
4666 // (Caller is responsible to ensure that limit-4 is positive.)
4667
4668 bind(Lswap);
4669 mov(result, word1); // grab loaded half-word into correct register
4670 // ary1 is odd and ary2 is even, so swap them
4671 mov(ary1, tmp);
4672 mov(ary2, ary1);
4673 mov(tmp, ary2);
4674 // and fall through to skewed loop
4675
4676 bind(Lskewed);
4677 // - ary1 is 4 (mod 8)
4678 // - ary2 is 0 (mod 8)
4679 // - word1 (low-order 32 bits) is ((int*)ary1)[0]
4680
4681 // Shift ary1 and ary2 to the end of the arrays, negate limit
4682 add(ary1, limit, ary1);
4683 add(ary2, limit, ary2);
4684 neg(limit, limit);
4685 // Align ary1 by pushing it ahead of word1:
4686 add(ary1, byte_width/2, ary1);
4687
4688 bind(Lskloop);
4689 // SKEWED MAIN LOOP
4690 // Load and compare skewed array elements of size 8 until the elements are not
4691 // equal or we reached the end of the arrays. Loop cleanup (in the case of
4692 // a remainder of 1..7 bytes) is handled in common with the aligned loop.
4693 sllx(word1, 32, result); // put word1 payload into MSW position
4694 // we already have word1; now fetch word2 and word3 (in one 64-bit chunk)
4695 { Register word23 = word1; // reuse temp locally
4696 load_sized_value(Address(ary1, limit), word23, byte_width, false);
4697 srlx(word23, 32, tmp); // put word2 payload into LSW position
4698 or(result, tmp, result); // materialize *(unaligned long)(ary1+limit-4) = [word1|word2]
4699 // and, the LSW of word1 (= word23) now contains word3, so we are good
4700 }
4701 load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4702 // To avoid accidents, ease out of this loop when we have 12 or fewer bytes left.
4703 cmp_and_br_short(limit, -(byte_with*3/2), Assembler::greaterEqual, Assembler::pn, Lofframp);
4704 cmp(result, tmp);
4705 // Check equality of elements
4706 bp(Assembler::equal, false, cc, Assembler::pt, target(Lskloop));
4707 delayed()->inc(limit, byte_width);
4708
4709 ba(Ldone);
4710 delayed()->clr(result); // not equal
4711
4712 bind(Lofframp);
4713 // limit is in the range [-12..-4], and there are 4..12 bytes left
4714 inccc(limit, byte_width);
4715 // Bail out immediately if there are 4..8 bytes left.
4716 br(Assembler::positive, false, Assembler::pn, Lremaining);
4717 delayed()->xorcc(tmp, result, tmp);
4718
4719 // There are 9..12 bytes left, so first handle the final 64-bit chunk
4720 bp(Assembler::notEqual, true, cc, Assembler::pt, Ldone);
4721 delayed()->clr(result); // not equal
4722
4723 // There are 1..4 bytes left now.
4724 sllx(word1, 32, result); // put word1 payload into MSW position
4725 load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4726 // No need to increment limit by 8; only the bottom 3 bits are significant.
4727 ba(Lremaining);
4728 delayed()->xorcc(tmp, result, tmp);
4729
4730 bind(Lfallthrough);
4731 // fall through to normal case
4732 }
4733
4734 // Shift ary1 and ary2 to the end of the arrays, negate limit
4735 // (Caller is responsible to ensure that limit starts out non-zero.)
4736 add(ary1, limit, ary1);
4737 add(ary2, limit, ary2);
4738 neg(limit, limit);
4739
4740 // MAIN LOOP
4741 // Load and compare array elements of size 'byte_width' until the elements are not
4742 // equal or we reached the end of the arrays. If the size of the arrays is not a
4743 // multiple of 'byte_width', we simply read over the end of the array, bail out and
4744 // compare the remaining bytes below by skipping the garbage bytes.
4745 load_sized_value(Address(ary1, limit), result, byte_width, false);
4746 bind(Lloop);
4747 load_sized_value(Address(ary2, limit), tmp, byte_width, false);
4748 inccc(limit, byte_width);
4749 // Bail out if we reached the end (but still do the comparison)
4750 br(Assembler::positive, false, Assembler::pn, Lremaining);
4751 delayed()->xorcc(tmp, result, tmp);
4752 // Check equality of elements
4753 bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop));
4754 delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false);
4755
4756 ba(Ldone);
4757 delayed()->clr(result); // not equal
4758
4759 // TAIL COMPARISON
4760 // We got here because we reached the end of the arrays. 'limit' is the number of
4761 // garbage bytes we may have compared by reading over the end of the arrays. Shift
4762 // out the garbage and compare the remaining elements.
4763 // The elements are pre-compared bitwise, in that result has been xored into tmp.
4764 // Also, in the following code, only the low 2-3 bits of 'limit' are significant.
4765 // A 'limit' value of either 0 or byte_width means "preserve all bits".
4766 bind(Lremaining);
4767 // Optimistic shortcut: elements potentially including garbage are equal
4768 bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone));
4769 delayed()->mov(1, result); // equal
4770 // Shift 'limit' bytes to the right and compare
4771 sll(limit, 3, limit); // bytes to bits
4772 clr(result);
4773 if (cc == icc) {
4774 srl(tmp, limit, tmp);
4775 } else {
4776 srlx(tmp, limit, tmp);
4777 }
4778 if (&Ldone != &Lfallthrough) {
4779 ba(Ldone);
4780 delayed(); // ->movr(...)
4781 }
4782 movr(tmp, Assembler::rc_z, 1, result); // may be a delay slot instruction
4783 bind(Lfallthrough);
4784 }
4785
4786 void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {
4787
4788 // test for negative bytes in input string of a given size
4789 // result 1 if found, 0 otherwise.
4790
4791 Label Lcore, Ltail, Lreturn, Lcore_rpt;
4792
4793 assert_different_registers(inp, size, t2, t3, t4, t5, result);
4794
4795 Register i = result; // result used as integer index i until very end
4796 Register lmask = t2; // t2 is aliased to lmask
4797
4798 // INITIALIZATION
4799 // ===========================================================
4800 // initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b)
4801 // compute unaligned offset -> i
4802 // compute core end index -> t5
4803 Assembler::sethi(0x80808000, t2); //! sethi macro fails to emit optimal
|