--- old/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-04-19 11:15:37.499577855 +0200 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-04-19 11:15:37.435577858 +0200 @@ -4516,18 +4516,10 @@ } // Compare the rest of the characters - if (ae == StrIntrinsicNode::UU) { - lduh(str1, limit1, chr1); - } else { - ldub(str1, limit1, chr1); - } + load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); bind(Lloop); - if (ae == StrIntrinsicNode::LL) { - ldub(str2, limit2, chr2); - } else { - lduh(str2, limit2, chr2); - } + load_sized_value(Address(str2, limit2), chr2, (ae == StrIntrinsicNode::LL) ? 1 : 2, false); subcc(chr1, chr2, chr1); br(Assembler::notZero, false, Assembler::pt, Ldone); @@ -4539,11 +4531,7 @@ // annul LDUB if branch is not taken to prevent access past end of string br(Assembler::notZero, true, Assembler::pt, Lloop); - if (ae == StrIntrinsicNode::UU) { - delayed()->lduh(str1, limit2, chr1); - } else { - delayed()->ldub(str1, limit1, chr1); - } + delayed()->load_sized_value(Address(str1, limit1), chr1, (ae == StrIntrinsicNode::UU) ? 2 : 1, false); // If strings are equal up to min length, return the length difference. if (ae == StrIntrinsicNode::UU) { @@ -4563,7 +4551,7 @@ void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register tmp, Register result, bool is_byte) { - Label Ldone, Lvector, Lloop; + Label Ldone, Lslow, Lsmall; assert_different_registers(ary1, ary2, limit, tmp, result); int length_offset = arrayOopDesc::length_offset_in_bytes(); @@ -4573,13 +4561,13 @@ // return true if the same array cmp(ary1, ary2); brx(Assembler::equal, true, Assembler::pn, Ldone); - delayed()->add(G0, 1, result); // equal + delayed()->mov(1, result); // equal br_null(ary1, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal br_null(ary2, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal // load the lengths of arrays ld(Address(ary1, length_offset), limit); @@ -4588,82 +4576,106 @@ // return false if the two arrays are not equal length cmp(limit, tmp); br(Assembler::notEqual, true, Assembler::pn, Ldone); - delayed()->mov(G0, result); // not equal + delayed()->clr(result); // not equal } cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal + delayed()->mov(1, result); // zero-length arrays are equal if (is_array_equ) { // load array addresses add(ary1, base_offset, ary1); add(ary2, base_offset, ary2); + // set byte count + if (!is_byte) { + sll(limit, exact_log2(sizeof(jchar)), limit); + } } else { // We have no guarantee that on 64 bit the higher half of limit is 0 signx(limit); } - if (is_byte) { - Label Lskip; - // check for trailing byte - andcc(limit, 0x1, tmp); - br(Assembler::zero, false, Assembler::pt, Lskip); - delayed()->nop(); - - // compare the trailing byte - sub(limit, sizeof(jbyte), limit); - ldub(ary1, limit, result); - ldub(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - - // only one byte? - cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal - bind(Lskip); - } else if (is_array_equ) { - // set byte count - sll(limit, exact_log2(sizeof(jchar)), limit); - } + // Check for small arrays with size <= 4 bytes + cmp_and_br_short(limit, 4, Assembler::lessEqual, Assembler::pn, Lsmall); - // check for trailing character - andcc(limit, 0x2, tmp); - br(Assembler::zero, false, Assembler::pt, Lvector); - delayed()->nop(); - - // compare the trailing char - sub(limit, sizeof(jchar), limit); - lduh(ary1, limit, result); - lduh(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal + // Check for doubleword (8 byte) alignment of ary1 and ary2 + or3(ary1, ary2, tmp); + andcc(tmp, 7, tmp); + br_notnull_short(tmp, Assembler::pn, Lslow); + + // Aligned, perform doubleword comparison + array_equals_core(ary1, ary2, limit, tmp, result, 8, true, Ldone); + ba(Ldone); + delayed()->movcc(Assembler::equal, false, xcc, 1, result); + + // Unaligned, perform word comparison (word alignment is guaranteed) + bind(Lslow); + array_equals_core(ary1, ary2, limit, tmp, result, 4, true, Ldone); + ba(Ldone); + delayed()->movcc(Assembler::equal, false, icc, 1, result); + + // Small, perform word comparison without loop + bind(Lsmall); + array_equals_core(ary1, ary2, limit, tmp, result, 4, false, Ldone); + movcc(Assembler::equal, false, icc, 1, result); - // only one char? - cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal - - // word by word compare, dont't need alignment check - bind(Lvector); - // Shift ary1 and ary2 to the end of the arrays, negate limit - add(ary1, limit, ary1); - add(ary2, limit, ary2); - neg(limit, limit); + bind(Ldone); +} - lduw(ary1, limit, result); - bind(Lloop); - lduw(ary2, limit, tmp); - cmp(result, tmp); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - inccc(limit, 2*sizeof(jchar)); - // annul LDUW if branch is not taken to prevent access past end of array - br(Assembler::notZero, true, Assembler::pt, Lloop); - delayed()->lduw(ary1, limit, result); // hoisted +// Compares two arrays in chunks of size 'byte_width'. The addresses must be aligned accordingly. +void MacroAssembler::array_equals_core(Register ary1, Register ary2, Register limit, Register tmp, + Register result, size_t byte_width, bool loop, Label& Ldone) { + Label Lloop, Lremaining; + // Use appropriate CC register depending on byte_width + Assembler::CC cc = (byte_width == 8) ? xcc : icc; + + if (loop) { + // Shift ary1 and ary2 to the end of the arrays, negate limit + add(ary1, limit, ary1); + add(ary2, limit, ary2); + neg(limit, limit); + + // MAIN LOOP + // Load and compare array elements of size 'byte_width' until the elements are not + // equal or we reached the end of the arrays. If the size of the arrays is not a + // multiple of 'byte_width', we simply read over the end of the array, bail out and + // compare the remaining bytes below by skipping the garbage bytes. + load_sized_value(Address(ary1, limit), result, byte_width, false); + bind(Lloop); + load_sized_value(Address(ary2, limit), tmp, byte_width, false); + inccc(limit, byte_width); + // Bail out if we reached the end (but still do the comparison) + br(Assembler::positive, false, Assembler::pn, Lremaining); + delayed()->cmp(result, tmp); + // Check equality of elements + bp(Assembler::equal, false, cc, Assembler::pt, target(Lloop)); + delayed()->load_sized_value(Address(ary1, limit), result, byte_width, false); - add(G0, 1, result); // equals - bind(Ldone); + ba(Ldone); + delayed()->clr(result); // not equal + } else { + load_sized_value(Address(ary1, 0), result, byte_width, false); + load_sized_value(Address(ary2, 0), tmp, byte_width, false); + inc(limit, byte_width); + } + + // TAIL COMPARISON + // We got here because we reached the end of the arrays. 'limit' is the number of + // garbage bytes we may have compared by reading over the end of the arrays. Shift + // out the garbage and compare the remaining elements. + bind(Lremaining); + if (loop) { + // Optimistic shortcut: elements potentially including garbage are equal + bp(Assembler::equal, true, cc, Assembler::pt, target(Ldone)); + delayed()->mov(1, result); // equal + } + // Shift 'limit' bytes to the right and compare + sll(limit, 3, limit); // bytes to bits + srlx(result, limit, result); + srlx(tmp, limit, tmp); + cmp(result, tmp); + clr(result); + // CC register contains result } void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5) {