< prev index next >
src/cpu/sparc/vm/macroAssembler_sparc.cpp
Print this page
*** 42,51 ****
--- 42,54 ----
#if INCLUDE_ALL_GCS
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1SATBCardTableModRefBS.hpp"
#include "gc/g1/heapRegion.hpp"
#endif // INCLUDE_ALL_GCS
+ #ifdef COMPILER2
+ #include "opto/intrinsicnode.hpp"
+ #endif
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#define STOP(error) stop(error)
#else
*** 4251,4306 ****
load_ptr_contents(base, G6_heapbase);
}
}
}
! // Compare char[] arrays aligned to 4 bytes.
! void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
! Register limit, Register result,
! Register chr1, Register chr2, Label& Ldone) {
! Label Lvector, Lloop;
! assert(chr1 == result, "should be the same");
! // Note: limit contains number of bytes (2*char_elements) != 0.
! andcc(limit, 0x2, chr1); // trailing character ?
br(Assembler::zero, false, Assembler::pt, Lvector);
delayed()->nop();
// compare the trailing char
sub(limit, sizeof(jchar), limit);
! lduh(ary1, limit, chr1);
! lduh(ary2, limit, chr2);
! cmp(chr1, chr2);
br(Assembler::notEqual, true, Assembler::pt, Ldone);
delayed()->mov(G0, result); // not equal
! // only one char ?
cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
delayed()->add(G0, 1, result); // zero-length arrays are equal
// word by word compare, dont't need alignment check
bind(Lvector);
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
! lduw(ary1, limit, chr1);
bind(Lloop);
! lduw(ary2, limit, chr2);
! cmp(chr1, chr2);
br(Assembler::notEqual, true, Assembler::pt, Ldone);
delayed()->mov(G0, result); // not equal
inccc(limit, 2*sizeof(jchar));
// annul LDUW if branch is not taken to prevent access past end of array
br(Assembler::notZero, true, Assembler::pt, Lloop);
! delayed()->lduw(ary1, limit, chr1); // hoisted
! // Caller should set it:
! // add(G0, 1, result); // equals
}
// Use BIS for zeroing (count is in bytes).
void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
Register end = count;
int cache_line_size = VM_Version::prefetch_data_size();
--- 4254,4669 ----
load_ptr_contents(base, G6_heapbase);
}
}
}
! #ifdef COMPILER2
!
! // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
! void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
! Register tmp1, Register tmp2, Register tmp3, Register tmp4,
! FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
! Label Lloop, Lslow;
! assert(UseVIS >= 3, "VIS3 is required");
! assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
! assert_different_registers(ftmp1, ftmp2, ftmp3);
!
! // Check if cnt >= 8 (= 16 bytes)
! cmp(cnt, 8);
! br(Assembler::less, false, Assembler::pn, Lslow);
! delayed()->mov(cnt, result); // copy count
!
! // Check for 8-byte alignment of src and dst
! or3(src, dst, tmp1);
! andcc(tmp1, 7, G0);
! br(Assembler::notZero, false, Assembler::pn, Lslow);
! delayed()->nop();
!
! // Set mask for bshuffle instruction
! Register mask = tmp4;
! set(0x13579bdf, mask);
! bmask(mask, G0, G0);
!
! // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
! Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
! add(mask, 0x300, mask); // mask = 0x0000 0000 ff00 ff00
! sllx(mask, 32, tmp1); // tmp1 = 0xff00 ff00 0000 0000
! or3(mask, tmp1, mask); // mask = 0xff00 ff00 ff00 ff00
!
! // Load first 8 bytes
! ldx(src, 0, tmp1);
!
! bind(Lloop);
! // Load next 8 bytes
! ldx(src, 8, tmp2);
!
! // Check for non-latin1 character by testing if the most significant byte of a char is set.
! // Although we have to move the data between integer and floating point registers, this is
! // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
! or3(tmp1, tmp2, tmp3);
! btst(tmp3, mask);
! // annul zeroing if branch is not taken to preserve original count
! brx(Assembler::notZero, true, Assembler::pn, Ldone);
! delayed()->mov(G0, result); // 0 - failed
!
! // Move bytes into float register
! movxtod(tmp1, ftmp1);
! movxtod(tmp2, ftmp2);
!
! // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
! bshuffle(ftmp1, ftmp2, ftmp3);
! stf(FloatRegisterImpl::D, ftmp3, dst, 0);
!
! // Increment addresses and decrement count
! inc(src, 16);
! inc(dst, 8);
! dec(cnt, 8);
!
! cmp(cnt, 8);
! // annul LDX if branch is not taken to prevent access past end of string
! br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
! delayed()->ldx(src, 0, tmp1);
!
! // Fallback to slow version
! bind(Lslow);
! }
!
! // Compress char[] to byte[]. Return 0 on failure.
! void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
! Label Lloop;
! assert_different_registers(src, dst, cnt, tmp, result);
!
! lduh(src, 0, tmp);
!
! bind(Lloop);
! inc(src, sizeof(jchar));
! cmp(tmp, 0xff);
! // annul zeroing if branch is not taken to preserve original count
! br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
! delayed()->mov(G0, result); // 0 - failed
! deccc(cnt);
! stb(tmp, dst, 0);
! inc(dst);
! // annul LDUH if branch is not taken to prevent access past end of string
! br(Assembler::notZero, true, Assembler::pt, Lloop);
! delayed()->lduh(src, 0, tmp); // hoisted
! }
!
! // Inflate byte[] to char[] by inflating 16 bytes at once.
! void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
! FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
! Label Lloop, Lslow;
! assert(UseVIS >= 3, "VIS3 is required");
! assert_different_registers(src, dst, cnt, tmp);
! assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
!
! // Check if cnt >= 8 (= 16 bytes)
! cmp(cnt, 8);
! br(Assembler::less, false, Assembler::pn, Lslow);
! delayed()->nop();
!
! // Check for 8-byte alignment of src and dst
! or3(src, dst, tmp);
! andcc(tmp, 7, G0);
! br(Assembler::notZero, false, Assembler::pn, Lslow);
! // Initialize float register to zero
! FloatRegister zerof = ftmp4;
! delayed()->fzero(FloatRegisterImpl::D, zerof);
!
! // Load first 8 bytes
! ldf(FloatRegisterImpl::D, src, 0, ftmp1);
!
! bind(Lloop);
! inc(src, 8);
! dec(cnt, 8);
!
! // Inflate the string by interleaving each byte from the source array
! // with a zero byte and storing the result in the destination array.
! fpmerge(zerof, ftmp1->successor(), ftmp2);
! stf(FloatRegisterImpl::D, ftmp2, dst, 8);
! fpmerge(zerof, ftmp1, ftmp3);
! stf(FloatRegisterImpl::D, ftmp3, dst, 0);
!
! inc(dst, 16);
!
! cmp(cnt, 8);
! // annul LDX if branch is not taken to prevent access past end of string
! br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
! delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
!
! // Fallback to slow version
! bind(Lslow);
! }
!
! // Inflate byte[] to char[].
! void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
! Label Loop;
! assert_different_registers(src, dst, cnt, tmp);
!
! ldub(src, 0, tmp);
! bind(Loop);
! inc(src);
! deccc(cnt);
! sth(tmp, dst, 0);
! inc(dst, sizeof(jchar));
! // annul LDUB if branch is not taken to prevent access past end of string
! br(Assembler::notZero, true, Assembler::pt, Loop);
! delayed()->ldub(src, 0, tmp); // hoisted
! }
!
! void MacroAssembler::string_compare(Register str1, Register str2,
! Register cnt1, Register cnt2,
! Register tmp1, Register tmp2,
! Register result, int ae) {
! Label Ldone, Lloop;
! assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
! int stride1, stride2;
!
! // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
! // we interchange str1 and str2 in the UL case and negate the result.
! // Like this, str1 is always latin1 encoded, expect for the UU case.
!
! if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
! srl(cnt2, 1, cnt2);
! }
!
! // See if the lengths are different, and calculate min in cnt1.
! // Save diff in case we need it for a tie-breaker.
! Label Lskip;
! Register diff = tmp1;
! subcc(cnt1, cnt2, diff);
! br(Assembler::greater, true, Assembler::pt, Lskip);
! // cnt2 is shorter, so use its count:
! delayed()->mov(cnt2, cnt1);
! bind(Lskip);
!
! // Rename registers
! Register limit1 = cnt1;
! Register limit2 = limit1;
! Register chr1 = result;
! Register chr2 = cnt2;
! if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
! // We need an additional register to keep track of two limits
! assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
! limit2 = tmp2;
! }
!
! // Is the minimum length zero?
! cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
! br(Assembler::equal, true, Assembler::pn, Ldone);
! // result is difference in lengths
! if (ae == StrIntrinsicNode::UU) {
! delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
! } else {
! delayed()->mov(diff, result);
! }
!
! // Load first characters
! if (ae == StrIntrinsicNode::LL) {
! stride1 = stride2 = sizeof(jbyte);
! ldub(str1, 0, chr1);
! ldub(str2, 0, chr2);
! } else if (ae == StrIntrinsicNode::UU) {
! stride1 = stride2 = sizeof(jchar);
! lduh(str1, 0, chr1);
! lduh(str2, 0, chr2);
! } else {
! stride1 = sizeof(jbyte);
! stride2 = sizeof(jchar);
! ldub(str1, 0, chr1);
! lduh(str2, 0, chr2);
! }
!
! // Compare first characters
! subcc(chr1, chr2, chr1);
! br(Assembler::notZero, false, Assembler::pt, Ldone);
! assert(chr1 == result, "result must be pre-placed");
! delayed()->nop();
!
! // Check if the strings start at same location
! cmp(str1, str2);
! brx(Assembler::equal, true, Assembler::pn, Ldone);
! delayed()->mov(G0, result); // result is zero
!
! // We have no guarantee that on 64 bit the higher half of limit is 0
! signx(limit1);
!
! // Get limit
! if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
! sll(limit1, 1, limit2);
! subcc(limit2, stride2, chr2);
! }
! subcc(limit1, stride1, chr1);
! br(Assembler::zero, true, Assembler::pn, Ldone);
! // result is difference in lengths
! if (ae == StrIntrinsicNode::UU) {
! delayed()->sra(diff, 1, result); // Divide by 2 to get number of chars
! } else {
! delayed()->mov(diff, result);
! }
!
! // Shift str1 and str2 to the end of the arrays, negate limit
! add(str1, limit1, str1);
! add(str2, limit2, str2);
! neg(chr1, limit1); // limit1 = -(limit1-stride1)
! if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
! neg(chr2, limit2); // limit2 = -(limit2-stride2)
! }
!
! // Compare the rest of the characters
! if (ae == StrIntrinsicNode::UU) {
! lduh(str1, limit1, chr1);
! } else {
! ldub(str1, limit1, chr1);
! }
!
! bind(Lloop);
! if (ae == StrIntrinsicNode::LL) {
! ldub(str2, limit2, chr2);
! } else {
! lduh(str2, limit2, chr2);
! }
!
! subcc(chr1, chr2, chr1);
! br(Assembler::notZero, false, Assembler::pt, Ldone);
! assert(chr1 == result, "result must be pre-placed");
! delayed()->inccc(limit1, stride1);
! if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
! inccc(limit2, stride2);
! }
!
! // annul LDUB if branch is not taken to prevent access past end of string
! br(Assembler::notZero, true, Assembler::pt, Lloop);
! if (ae == StrIntrinsicNode::UU) {
! delayed()->lduh(str1, limit2, chr1);
! } else {
! delayed()->ldub(str1, limit1, chr1);
! }
! // If strings are equal up to min length, return the length difference.
! if (ae == StrIntrinsicNode::UU) {
! // Divide by 2 to get number of chars
! sra(diff, 1, result);
! } else {
! mov(diff, result);
! }
!
! // Otherwise, return the difference between the first mismatched chars.
! bind(Ldone);
! if(ae == StrIntrinsicNode::UL) {
! // Negate result (see note above)
! neg(result);
! }
! }
!
! void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
! Register limit, Register tmp, Register result, bool is_byte) {
! Label Ldone, Lvector, Lloop;
! assert_different_registers(ary1, ary2, limit, tmp, result);
!
! int length_offset = arrayOopDesc::length_offset_in_bytes();
! int base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
!
! if (is_array_equ) {
! // return true if the same array
! cmp(ary1, ary2);
! brx(Assembler::equal, true, Assembler::pn, Ldone);
! delayed()->add(G0, 1, result); // equal
!
! br_null(ary1, true, Assembler::pn, Ldone);
! delayed()->mov(G0, result); // not equal
!
! br_null(ary2, true, Assembler::pn, Ldone);
! delayed()->mov(G0, result); // not equal
!
! // load the lengths of arrays
! ld(Address(ary1, length_offset), limit);
! ld(Address(ary2, length_offset), tmp);
!
! // return false if the two arrays are not equal length
! cmp(limit, tmp);
! br(Assembler::notEqual, true, Assembler::pn, Ldone);
! delayed()->mov(G0, result); // not equal
! }
!
! cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
! delayed()->add(G0, 1, result); // zero-length arrays are equal
!
! if (is_array_equ) {
! // load array addresses
! add(ary1, base_offset, ary1);
! add(ary2, base_offset, ary2);
! } else {
! // We have no guarantee that on 64 bit the higher half of limit is 0
! signx(limit);
! }
!
! if (is_byte) {
! Label Lskip;
! // check for trailing byte
! andcc(limit, 0x1, tmp);
! br(Assembler::zero, false, Assembler::pt, Lskip);
! delayed()->nop();
!
! // compare the trailing byte
! sub(limit, sizeof(jbyte), limit);
! ldub(ary1, limit, result);
! ldub(ary2, limit, tmp);
! cmp(result, tmp);
! br(Assembler::notEqual, true, Assembler::pt, Ldone);
! delayed()->mov(G0, result); // not equal
!
! // only one byte?
! cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
! delayed()->add(G0, 1, result); // zero-length arrays are equal
! bind(Lskip);
! } else if (is_array_equ) {
! // set byte count
! sll(limit, exact_log2(sizeof(jchar)), limit);
! }
!
! // check for trailing character
! andcc(limit, 0x2, tmp);
br(Assembler::zero, false, Assembler::pt, Lvector);
delayed()->nop();
// compare the trailing char
sub(limit, sizeof(jchar), limit);
! lduh(ary1, limit, result);
! lduh(ary2, limit, tmp);
! cmp(result, tmp);
br(Assembler::notEqual, true, Assembler::pt, Ldone);
delayed()->mov(G0, result); // not equal
! // only one char?
cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
delayed()->add(G0, 1, result); // zero-length arrays are equal
// word by word compare, dont't need alignment check
bind(Lvector);
// Shift ary1 and ary2 to the end of the arrays, negate limit
add(ary1, limit, ary1);
add(ary2, limit, ary2);
neg(limit, limit);
! lduw(ary1, limit, result);
bind(Lloop);
! lduw(ary2, limit, tmp);
! cmp(result, tmp);
br(Assembler::notEqual, true, Assembler::pt, Ldone);
delayed()->mov(G0, result); // not equal
inccc(limit, 2*sizeof(jchar));
// annul LDUW if branch is not taken to prevent access past end of array
br(Assembler::notZero, true, Assembler::pt, Lloop);
! delayed()->lduw(ary1, limit, result); // hoisted
! add(G0, 1, result); // equals
! bind(Ldone);
}
+ #endif
+
// Use BIS for zeroing (count is in bytes).
void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
Register end = count;
int cache_line_size = VM_Version::prefetch_data_size();
< prev index next >