--- old/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-04-20 10:48:17.103661309 +0200 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-04-20 10:48:17.031661312 +0200 @@ -4516,18 +4516,10 @@ } // Compare the rest of the characters - if (ae == StrIntrinsicNode::UU) { - lduh(str1, limit1, chr1); - } else { - ldub(str1, limit1, chr1); - } + load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); bind(Lloop); - if (ae == StrIntrinsicNode::LL) { - ldub(str2, limit2, chr2); - } else { - lduh(str2, limit2, chr2); - } + load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false); subcc(chr1, chr2, chr1); br(Assembler::notZero, false, Assembler::pt, Ldone); @@ -4539,11 +4531,7 @@ // annul LDUB if branch is not taken to prevent access past end of string br(Assembler::notZero, true, Assembler::pt, Lloop); - if (ae == StrIntrinsicNode::UU) { - delayed()->lduh(str1, limit2, chr1); - } else { - delayed()->ldub(str1, limit1, chr1); - } + delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); // If strings are equal up to min length, return the length difference. if (ae == StrIntrinsicNode::UU) { @@ -4563,7 +4551,7 @@ void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register tmp, Register result, bool is_byte) { - Label Ldone, Lvector, Lloop; + Label Ldone, Lunaligned; assert_different_registers(ary1, ary2, limit, tmp, result); int length_offset = arrayOopDesc::length_offset_in_bytes(); @@ -4573,13 +4561,13 @@ // return true if the same array cmp(ary1, ary2); brx(Assembler::equal, true, Assembler::pn, Ldone); - delayed()->add(G0, 1, result); // equal + delayed()->mov(1, result); // equal br_null(ary1, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal br_null(ary2, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal // load the lengths of arrays ld(Address(ary1, length_offset), limit); @@ -4588,82 +4576,100 @@ // return false if the two arrays are not equal length cmp(limit, tmp); br(Assembler::notEqual, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal } cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal + delayed()->mov(1, result); // zero-length arrays are equal if (is_array_equ) { // load array addresses add(ary1, base_offset, ary1); add(ary2, base_offset, ary2); + // set byte count + if (!is_byte) { + sll(limit, exact_log2(sizeof(jchar)), limit); + } } else { // We have no guarantee that on 64 bit the higher half of limit is 0 signx(limit); } - if (is_byte) { - Label Lskip; - // check for trailing byte - andcc(limit, 0x1, tmp); - br(Assembler::zero, false, Assembler::pt, Lskip); - delayed()->nop(); - - // compare the trailing byte - sub(limit, sizeof(jbyte), limit); - ldub(ary1, limit, result); - ldub(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - - // only one byte? - cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal - bind(Lskip); - } else if (is_array_equ) { - // set byte count - sll(limit, exact_log2(sizeof(jchar)), limit); - } + // Check for doubleword (8 byte) alignment of ary1 and ary2 + or3(ary1, ary2, tmp); + and3(tmp, 7, tmp); + br_notnull_short(tmp, Assembler::pn, Lunaligned); + + // Aligned, perform doubleword comparison + array_equals_loop(ary1, ary2, limit, tmp, result, 8, &Ldone); + + // Misaligned, perform word comparison (word alignment is guaranteed) + bind(Lunaligned); + array_equals_loop(ary1, ary2, limit, tmp, result, 4, NULL /* fall through */); + + bind(Ldone); +} + +// Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly. +void MacroAssembler::array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp, + Register result, size_t byte_width, Label* Ldone_or_null) { + Label Lloop, Lremaining, Lfallthrough; + // Fall through if the given label is NULL + Label& Ldone = (Ldone_or_null != NULL) ? *Ldone_or_null : Lfallthrough; - // check for trailing character - andcc(limit, 0x2, tmp); - br(Assembler::zero, false, Assembler::pt, Lvector); - delayed()->nop(); - - // compare the trailing char - sub(limit, sizeof(jchar), limit); - lduh(ary1, limit, result); - lduh(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - - // only one char? - cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal + // Use appropriate CC register depending on byte_width + Assembler::CC cc = (byte_width == 8) ? xcc : icc; - // word by word compare, dont't need alignment check - bind(Lvector); // Shift ary1 and ary2 to the end of the arrays, negate limit add(ary1, limit, ary1); add(ary2, limit, ary2); neg(limit, limit); - lduw(ary1, limit, result); + // MAIN LOOP + // Load and compare array elements of size 'byte_width' until the elements are not + // equal or we reached the end of the arrays. If the size of the arrays is not a + // multiple of 'byte_width', we simply read over the end of the array, bail out and + // compare the remaining bytes below by skipping the garbage bytes. + load_sized_value(Address(ary1, limit), result, byte_width, false); bind(Lloop); - lduw(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - inccc(limit, 2*sizeof(jchar)); - // annul LDUW if branch is not taken to prevent access past end of array - br(Assembler::notZero, true, Assembler::pt, Lloop); - delayed()->lduw(ary1, limit, result); // hoisted + load_sized_value(Address(ary2, limit), tmp, byte_width, false); + inccc(limit, byte_width); + // Bail out if we reached the end (but still do the comparison) + br(Assembler::positive, false, Assembler::pn, Lremaining); + delayed()->xorcc(tmp, result, tmp); + // Check equality of elements + bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop)); + delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false); + + ba(Ldone); + delayed()->clr(result); // not equal + + // TAIL COMPARISON + // We got here because we reached the end of the arrays. 'limit' is the number of + // garbage bytes we may have compared by reading over the end of the arrays. Shift + // out the garbage and compare the remaining elements. A 'limit' value of 0 means + // "preserve all bits". The elements are pre-compared bitwise, in that result has + // been xored into tmp. + bind(Lremaining); + // Optimistic shortcut: elements potentially including garbage are equal + bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone)); + delayed()->mov(1, result); // equal + sll(limit, 3, limit); // bytes to bits + clr(result); + // Shift 'limit' bytes to the right and compare + if (cc == icc) { + srl(tmp, limit, tmp); + } else { + srlx(tmp, limit, tmp); + } + // Fall through if we are at the end of the intrinsic + if (Ldone_or_null != NULL) { + ba(Ldone); + delayed(); // ->movr(...) + } + movr(Assembler::rc_z, tmp, 1, result); // may be a delay slot instruction - add(G0, 1, result); // equals - bind(Ldone); + bind(Lfallthrough); } void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) { --- old/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2016-04-20 10:48:17.363661297 +0200 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2016-04-20 10:48:17.295661300 +0200 @@ -1392,6 +1392,11 @@ void array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register tmp, Register result, bool is_byte); + + // Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly. + void array_equals_loop(Register ary1, Register ary2, Register limit, Register tmp, + Register result, size_t byte_width, Label* Ldone_or_null); + // test for negative bytes in input string of a given size, result 0 if none void has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4,