--- old/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-03-02 09:32:02.143873208 +0100 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-03-02 09:32:02.083873210 +0100 @@ -4666,8 +4666,106 @@ bind(Ldone); } +void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5, Register t6) { + + // test for negative bytes in input string of a given size + // result 1 if found, 0 otherwise. + + Label Lcore, Ltail, Lreturn, Lcore_rpt; + + assert_different_registers(inp, size, t2, t3, t4, t5, t6, result); + + Register i = result; // result used as integer index i until very end + Register lmask = t2; // t2 is aliased to lmask + + // INITIALIZATION + // =========================================================== + // initialize highbits mask -> lmask = 0x8080808080808080 (8B/64b) + // compute unaligned offset -> i + // compute core end index -> t6 + Assembler::sethi(0x80808000, t3); //! sethi macro fails to emit optimal + add(t3, 0x80, t2); + sllx(t2, 32, t5); + or3(t5, t2, lmask); // 0x8080808080808080 -> lmask + andcc(inp, 0x7, i); // unaligned offset -> i + br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned? + delayed()->add(size, -8, t6); // (annuled) core end index -> t6 + + // =========================================================== + + // UNALIGNED HEAD + // =========================================================== + // * unaligned head handling: grab aligned 8B containing unaligned inp(ut) + // * obliterate (ignore) bytes outside string by shifting off reg ends + // * compare with bitmask, short circuit return true if one or more high + // bits set. + neg(i, t3); + add(i, size, t5); + sra(t3, 0, t4); + ldx(inp, t4, t3); // raw aligned 8B containing unaligned head -> t3 + mov(8, t4); + sub(t4, t5, t4); + sra(t4, 31, t6); + andn(t4, t6, t5); + add(i, t5, t4); + sll(t5, 3, t5); + sll(t4, 3, t6); // # bits to shift right, left -> t5,t6 + srlx(t3, t5, t4); + sllx(t4, t6, t3); // bytes outside string in 8B header obliterated -> t3 + andcc(lmask, t3, G0); + brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit? + delayed()->mov(1,result); // annuled so i not clobbered for following + add(size, -8, t6); // core end index -> t6 + mov(8, t4); + sub(t4, i, i); // # bytes examined in unalgn head (<8) -> i + // =========================================================== + + // ALIGNED CORE + // =========================================================== + // * iterate index i over aligned 8B sections of core, comparing with + // bitmask, short circuit return true if one or more high bits set + // t6 contains core end index/loop limit which is the index + // of the MSB of last (unaligned) 8B fully contained in the string. + // inp contains address of first byte in string/array + // lmask contains 8B high bit mask for comparison + // i contains next index to be processed (adr. inp+i is on 8B boundary) + bind(Lcore); + cmp_and_br_short(i, t6, Assembler::greater, Assembler::pn, Ltail); + bind(Lcore_rpt); + ldx(i, inp, t3); + andcc(t3, lmask, G0); + brx(Assembler::notZero, true, Assembler::pn, Lreturn); + delayed()->mov(1, result); + add(i, 8, i); + cmp_and_br_short(i, t6, Assembler::lessEqual, Assembler::pn, Lcore_rpt); + // =========================================================== + + // ALIGNED TAIL (<8B) + // =========================================================== + // handle aligned tail of 7B or less as complete 8B, obliterating end of + // string bytes by shifting them off end, compare what's left with bitmask + // inp contains address of first byte in string/array + // lmask contains 8B high bit mask for comparison + // i contains next index to be processed (adr. inp+i is on 8B boundary) + bind(Ltail); + subcc(size, i, t4); // # of remaining bytes in string -> t4 + // return 0 if no more remaining bytes + br(Assembler::lessEqual, true, Assembler::pn, Lreturn); + delayed()->mov(0, result); // annuled: so i not clobbered for following load + ldx(inp, i, t6); // load final 8B (aligned) containing tail -> t6 + mov(8, t5); + sub(t5, t4, t3); + mov(0, result); // ** i clobbered at this point + sll(t3, 3, t5); // bits beyond end of string -> t5 + srlx(t6, t5, t3); // bytes beyond end now obliterated -> t3 + andcc(lmask, t3, G0); + movcc(Assembler::notZero, false, xcc, 1, result); + bind(Lreturn); +} + #endif + // Use BIS for zeroing (count is in bytes). void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");