< prev index next >

src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page

        

@@ -4664,12 +4664,110 @@
 
   add(G0, 1, result); // equals
   bind(Ldone);
 }
 
+void MacroAssembler::has_negatives(Register inp, Register size, Register result, Register t2, Register t3, Register t4, Register t5, Register t6) {
+
+  // test for negative bytes in input string of a given size
+  // result 1 if found, 0 otherwise.
+
+  Label Lcore, Ltail, Lreturn, Lcore_rpt;
+
+  assert_different_registers(inp, size, t2, t3, t4, t5, t6, result);
+
+  Register i     = result;  // result used as integer index i until very end
+  Register lmask = t2;      // t2 is aliased to lmask
+
+  // INITIALIZATION
+  // ===========================================================
+  // initialize highbits mask -> lmask = 0x8080808080808080  (8B/64b)
+  // compute unaligned offset -> i
+  // compute core end index   -> t6
+  Assembler::sethi(0x80808000, t3);   //! sethi macro fails to emit optimal
+  add(t3, 0x80, t2);
+  sllx(t2, 32, t5);
+  or3(t5, t2, lmask);                 // 0x8080808080808080 -> lmask
+  andcc(inp, 0x7, i);                 // unaligned offset -> i
+  br(Assembler::zero, true, Assembler::pn, Lcore); // starts 8B aligned?
+  delayed()->add(size, -8, t6);       // (annuled) core end index -> t6
+
+  // ===========================================================
+
+  // UNALIGNED HEAD
+  // ===========================================================
+  // * unaligned head handling: grab aligned 8B containing unaligned inp(ut)
+  // * obliterate (ignore) bytes outside string by shifting off reg ends
+  // * compare with bitmask, short circuit return true if one or more high
+  //   bits set.
+  neg(i, t3);
+  add(i, size, t5);
+  sra(t3, 0, t4);
+  ldx(inp, t4, t3);  // raw aligned 8B containing unaligned head -> t3
+  mov(8, t4);
+  sub(t4, t5, t4);
+  sra(t4, 31, t6);
+  andn(t4, t6, t5);
+  add(i, t5, t4);
+  sll(t5, 3, t5);
+  sll(t4, 3, t6);   // # bits to shift right, left -> t5,t6
+  srlx(t3, t5, t4);
+  sllx(t4, t6, t3); // bytes outside string in 8B header obliterated -> t3
+  andcc(lmask, t3, G0);
+  brx(Assembler::notZero, true, Assembler::pn, Lreturn); // short circuit?
+  delayed()->mov(1,result);      // annuled so i not clobbered for following
+  add(size, -8, t6);             // core end index -> t6
+  mov(8, t4);
+  sub(t4, i, i);                 // # bytes examined in unalgn head (<8) -> i
+  // ===========================================================
+
+  // ALIGNED CORE
+  // ===========================================================
+  // * iterate index i over aligned 8B sections of core, comparing with
+  //   bitmask, short circuit return true if one or more high bits set
+  // t6 contains core end index/loop limit which is the index
+  //     of the MSB of last (unaligned) 8B fully contained in the string.
+  // inp   contains address of first byte in string/array
+  // lmask contains 8B high bit mask for comparison
+  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
+  bind(Lcore);
+  cmp_and_br_short(i, t6, Assembler::greater, Assembler::pn, Ltail);
+  bind(Lcore_rpt);
+  ldx(i, inp, t3);
+  andcc(t3, lmask, G0);
+  brx(Assembler::notZero, true, Assembler::pn, Lreturn);
+  delayed()->mov(1, result);
+  add(i, 8, i);
+  cmp_and_br_short(i, t6, Assembler::lessEqual, Assembler::pn, Lcore_rpt);
+  // ===========================================================
+
+  // ALIGNED TAIL (<8B)
+  // ===========================================================
+  // handle aligned tail of 7B or less as complete 8B, obliterating end of
+  // string bytes by shifting them off end, compare what's left with bitmask
+  // inp   contains address of first byte in string/array
+  // lmask contains 8B high bit mask for comparison
+  // i     contains next index to be processed (adr. inp+i is on 8B boundary)
+  bind(Ltail);
+  subcc(size, i, t4);   // # of remaining bytes in string -> t4
+  // return 0 if no more remaining bytes
+  br(Assembler::lessEqual, true, Assembler::pn, Lreturn);
+  delayed()->mov(0, result); // annuled: so i not clobbered for following load
+  ldx(inp, i, t6);       // load final 8B (aligned) containing tail -> t6
+  mov(8, t5);
+  sub(t5, t4, t3);
+  mov(0, result);        // ** i clobbered at this point
+  sll(t3, 3, t5);        // bits beyond end of string          -> t5
+  srlx(t6, t5, t3);      // bytes beyond end now obliterated   -> t3
+  andcc(lmask, t3, G0);
+  movcc(Assembler::notZero, false, xcc,  1, result);
+  bind(Lreturn);
+}
+
 #endif
 
+
 // Use BIS for zeroing (count is in bytes).
 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
   Register end = count;
   int cache_line_size = VM_Version::prefetch_data_size();
< prev index next >