< prev index next >

src/cpu/sparc/vm/macroAssembler_sparc.cpp

Print this page




  27 #include "compiler/disassembler.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "memory/universe.hpp"
  33 #include "oops/klass.inline.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/interfaceSupport.hpp"
  37 #include "runtime/objectMonitor.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "utilities/macros.hpp"
  42 #if INCLUDE_ALL_GCS
  43 #include "gc/g1/g1CollectedHeap.inline.hpp"
  44 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  45 #include "gc/g1/heapRegion.hpp"
  46 #endif // INCLUDE_ALL_GCS



  47 
  48 #ifdef PRODUCT
  49 #define BLOCK_COMMENT(str) /* nothing */
  50 #define STOP(error) stop(error)
  51 #else
  52 #define BLOCK_COMMENT(str) block_comment(str)
  53 #define STOP(error) block_comment(error); stop(error)
  54 #endif
  55 
  56 // Convert the raw encoding form into the form expected by the
  57 // constructor for Address.
  58 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
  59   assert(scale == 0, "not supported");
  60   RelocationHolder rspec;
  61   if (disp_reloc != relocInfo::none) {
  62     rspec = Relocation::spec_simple(disp_reloc);
  63   }
  64 
  65   Register rindex = as_Register(index);
  66   if (rindex != G0) {


4236       }
4237     } else {
4238       // shift/mov src into dst.
4239       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift() || Universe::narrow_klass_shift() == 0, "decode alg wrong");
4240       sllx(src, Universe::narrow_klass_shift(), dst);
4241     }
4242   }
4243 }
4244 
4245 void MacroAssembler::reinit_heapbase() {
4246   if (UseCompressedOops || UseCompressedClassPointers) {
4247     if (Universe::heap() != NULL) {
4248       set((intptr_t)Universe::narrow_ptrs_base(), G6_heapbase);
4249     } else {
4250       AddressLiteral base(Universe::narrow_ptrs_base_addr());
4251       load_ptr_contents(base, G6_heapbase);
4252     }
4253   }
4254 }
4255 
4256 // Compare char[] arrays aligned to 4 bytes.
4257 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
4258                                         Register limit, Register result,
4259                                         Register chr1, Register chr2, Label& Ldone) {
4260   Label Lvector, Lloop;
4261   assert(chr1 == result, "should be the same");




















































































































































































































































































4262 
4263   // Note: limit contains number of bytes (2*char_elements) != 0.
4264   andcc(limit, 0x2, chr1); // trailing character ?


















































































4265   br(Assembler::zero, false, Assembler::pt, Lvector);
4266   delayed()->nop();
4267 
4268   // compare the trailing char
4269   sub(limit, sizeof(jchar), limit);
4270   lduh(ary1, limit, chr1);
4271   lduh(ary2, limit, chr2);
4272   cmp(chr1, chr2);
4273   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4274   delayed()->mov(G0, result);     // not equal
4275 
4276   // only one char ?
4277   cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
4278   delayed()->add(G0, 1, result); // zero-length arrays are equal
4279 
4280   // word by word compare, dont't need alignment check
4281   bind(Lvector);
4282   // Shift ary1 and ary2 to the end of the arrays, negate limit
4283   add(ary1, limit, ary1);
4284   add(ary2, limit, ary2);
4285   neg(limit, limit);
4286 
4287   lduw(ary1, limit, chr1);
4288   bind(Lloop);
4289   lduw(ary2, limit, chr2);
4290   cmp(chr1, chr2);
4291   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4292   delayed()->mov(G0, result);     // not equal
4293   inccc(limit, 2*sizeof(jchar));
4294   // annul LDUW if branch is not taken to prevent access past end of array
4295   br(Assembler::notZero, true, Assembler::pt, Lloop);
4296   delayed()->lduw(ary1, limit, chr1); // hoisted
4297 
4298   // Caller should set it:
4299   // add(G0, 1, result); // equals
4300 }


4301 
4302 // Use BIS for zeroing (count is in bytes).
4303 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4304   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4305   Register end = count;
4306   int cache_line_size = VM_Version::prefetch_data_size();
4307   // Minimum count when BIS zeroing can be used since
4308   // it needs membar which is expensive.
4309   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4310 
4311   Label small_loop;
4312   // Check if count is negative (dead code) or zero.
4313   // Note, count uses 64bit in 64 bit VM.
4314   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4315 
4316   // Use BIS zeroing only for big arrays since it requires membar.
4317   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4318     cmp(count, block_zero_size);
4319   } else {
4320     set(block_zero_size, temp);




  27 #include "compiler/disassembler.hpp"
  28 #include "gc/shared/cardTableModRefBS.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "memory/universe.hpp"
  33 #include "oops/klass.inline.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/interfaceSupport.hpp"
  37 #include "runtime/objectMonitor.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "utilities/macros.hpp"
  42 #if INCLUDE_ALL_GCS
  43 #include "gc/g1/g1CollectedHeap.inline.hpp"
  44 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  45 #include "gc/g1/heapRegion.hpp"
  46 #endif // INCLUDE_ALL_GCS
  47 #ifdef COMPILER2
  48 #include "opto/intrinsicnode.hpp"
  49 #endif
  50 
  51 #ifdef PRODUCT
  52 #define BLOCK_COMMENT(str) /* nothing */
  53 #define STOP(error) stop(error)
  54 #else
  55 #define BLOCK_COMMENT(str) block_comment(str)
  56 #define STOP(error) block_comment(error); stop(error)
  57 #endif
  58 
  59 // Convert the raw encoding form into the form expected by the
  60 // constructor for Address.
  61 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
  62   assert(scale == 0, "not supported");
  63   RelocationHolder rspec;
  64   if (disp_reloc != relocInfo::none) {
  65     rspec = Relocation::spec_simple(disp_reloc);
  66   }
  67 
  68   Register rindex = as_Register(index);
  69   if (rindex != G0) {


4239       }
4240     } else {
4241       // shift/mov src into dst.
4242       assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift() || Universe::narrow_klass_shift() == 0, "decode alg wrong");
4243       sllx(src, Universe::narrow_klass_shift(), dst);
4244     }
4245   }
4246 }
4247 
4248 void MacroAssembler::reinit_heapbase() {
4249   if (UseCompressedOops || UseCompressedClassPointers) {
4250     if (Universe::heap() != NULL) {
4251       set((intptr_t)Universe::narrow_ptrs_base(), G6_heapbase);
4252     } else {
4253       AddressLiteral base(Universe::narrow_ptrs_base_addr());
4254       load_ptr_contents(base, G6_heapbase);
4255     }
4256   }
4257 }
4258 
4259 #ifdef COMPILER2
4260 
4261 // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
4262 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
4263                                         Register tmp1, Register tmp2, Register tmp3, Register tmp4,
4264                                         FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
4265   Label Lloop, Lslow;
4266   assert(UseVIS >= 3, "VIS3 is required");
4267   assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
4268   assert_different_registers(ftmp1, ftmp2, ftmp3);
4269 
4270   // Check if cnt >= 8 (= 16 bytes)
4271   cmp(cnt, 8);
4272   br(Assembler::less, false, Assembler::pn, Lslow);
4273   delayed()->mov(cnt, result); // copy count
4274 
4275   // Check for 8-byte alignment of src and dst
4276   or3(src, dst, tmp1);
4277   andcc(tmp1, 7, G0);
4278   br(Assembler::notZero, false, Assembler::pn, Lslow);
4279   delayed()->nop();
4280 
4281   // Set mask for bshuffle instruction
4282   Register mask = tmp4;
4283   set(0x13579bdf, mask);
4284   bmask(mask, G0, G0);
4285 
4286   // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
4287   Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
4288   add(mask, 0x300, mask);             // mask = 0x0000 0000 ff00 ff00
4289   sllx(mask, 32, tmp1);               // tmp1 = 0xff00 ff00 0000 0000
4290   or3(mask, tmp1, mask);              // mask = 0xff00 ff00 ff00 ff00
4291 
4292   // Load first 8 bytes
4293   ldx(src, 0, tmp1);
4294 
4295   bind(Lloop);
4296   // Load next 8 bytes
4297   ldx(src, 8, tmp2);
4298 
4299   // Check for non-latin1 character by testing if the most significant byte of a char is set.
4300   // Although we have to move the data between integer and floating point registers, this is
4301   // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
4302   or3(tmp1, tmp2, tmp3);
4303   btst(tmp3, mask);
4304   // annul zeroing if branch is not taken to preserve original count
4305   brx(Assembler::notZero, true, Assembler::pn, Ldone);
4306   delayed()->mov(G0, result); // 0 - failed
4307 
4308   // Move bytes into float register
4309   movxtod(tmp1, ftmp1);
4310   movxtod(tmp2, ftmp2);
4311 
4312   // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
4313   bshuffle(ftmp1, ftmp2, ftmp3);
4314   stf(FloatRegisterImpl::D, ftmp3, dst, 0);
4315 
4316   // Increment addresses and decrement count
4317   inc(src, 16);
4318   inc(dst, 8);
4319   dec(cnt, 8);
4320 
4321   cmp(cnt, 8);
4322   // annul LDX if branch is not taken to prevent access past end of string
4323   br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
4324   delayed()->ldx(src, 0, tmp1);
4325 
4326   // Fallback to slow version
4327   bind(Lslow);
4328 }
4329 
4330 // Compress char[] to byte[]. Return 0 on failure.
4331 void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
4332   Label Lloop;
4333   assert_different_registers(src, dst, cnt, tmp, result);
4334 
4335   lduh(src, 0, tmp);
4336 
4337   bind(Lloop);
4338   inc(src, sizeof(jchar));
4339   cmp(tmp, 0xff);
4340   // annul zeroing if branch is not taken to preserve original count
4341   br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
4342   delayed()->mov(G0, result); // 0 - failed
4343   deccc(cnt);
4344   stb(tmp, dst, 0);
4345   inc(dst);
4346   // annul LDUH if branch is not taken to prevent access past end of string
4347   br(Assembler::notZero, true, Assembler::pt, Lloop);
4348   delayed()->lduh(src, 0, tmp); // hoisted
4349 }
4350 
4351 // Inflate byte[] to char[] by inflating 16 bytes at once.
4352 void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
4353                                        FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
4354   Label Lloop, Lslow;
4355   assert(UseVIS >= 3, "VIS3 is required");
4356   assert_different_registers(src, dst, cnt, tmp);
4357   assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
4358 
4359   // Check if cnt >= 8 (= 16 bytes)
4360   cmp(cnt, 8);
4361   br(Assembler::less, false, Assembler::pn, Lslow);
4362   delayed()->nop();
4363 
4364   // Check for 8-byte alignment of src and dst
4365   or3(src, dst, tmp);
4366   andcc(tmp, 7, G0);
4367   br(Assembler::notZero, false, Assembler::pn, Lslow);
4368   // Initialize float register to zero
4369   FloatRegister zerof = ftmp4;
4370   delayed()->fzero(FloatRegisterImpl::D, zerof);
4371 
4372   // Load first 8 bytes
4373   ldf(FloatRegisterImpl::D, src, 0, ftmp1);
4374 
4375   bind(Lloop);
4376   inc(src, 8);
4377   dec(cnt, 8);
4378 
4379   // Inflate the string by interleaving each byte from the source array
4380   // with a zero byte and storing the result in the destination array.
4381   fpmerge(zerof, ftmp1->successor(), ftmp2);
4382   stf(FloatRegisterImpl::D, ftmp2, dst, 8);
4383   fpmerge(zerof, ftmp1, ftmp3);
4384   stf(FloatRegisterImpl::D, ftmp3, dst, 0);
4385 
4386   inc(dst, 16);
4387 
4388   cmp(cnt, 8);
4389   // annul LDX if branch is not taken to prevent access past end of string
4390   br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
4391   delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
4392 
4393   // Fallback to slow version
4394   bind(Lslow);
4395 }
4396 
4397 // Inflate byte[] to char[].
4398 void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
4399   Label Loop;
4400   assert_different_registers(src, dst, cnt, tmp);
4401 
4402   ldub(src, 0, tmp);
4403   bind(Loop);
4404   inc(src);
4405   deccc(cnt);
4406   sth(tmp, dst, 0);
4407   inc(dst, sizeof(jchar));
4408   // annul LDUB if branch is not taken to prevent access past end of string
4409   br(Assembler::notZero, true, Assembler::pt, Loop);
4410   delayed()->ldub(src, 0, tmp); // hoisted
4411 }
4412 
4413 void MacroAssembler::string_compare(Register str1, Register str2,
4414                                     Register cnt1, Register cnt2,
4415                                     Register tmp1, Register tmp2,
4416                                     Register result, int ae) {
4417   Label Ldone, Lloop;
4418   assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
4419   int stride1, stride2;
4420 
4421   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
4422   // we interchange str1 and str2 in the UL case and negate the result.
4423   // Like this, str1 is always latin1 encoded, expect for the UU case.
4424 
4425   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
4426     srl(cnt2, 1, cnt2);
4427   }
4428 
4429   // See if the lengths are different, and calculate min in cnt1.
4430   // Save diff in case we need it for a tie-breaker.
4431   Label Lskip;
4432   Register diff = tmp1;
4433   subcc(cnt1, cnt2, diff);
4434   br(Assembler::greater, true, Assembler::pt, Lskip);
4435   // cnt2 is shorter, so use its count:
4436   delayed()->mov(cnt2, cnt1);
4437   bind(Lskip);
4438 
4439   // Rename registers
4440   Register limit1 = cnt1;
4441   Register limit2 = limit1;
4442   Register chr1   = result;
4443   Register chr2   = cnt2;
4444   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
4445     // We need an additional register to keep track of two limits
4446     assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
4447     limit2 = tmp2;
4448   }
4449 
4450   // Is the minimum length zero?
4451   cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
4452   br(Assembler::equal, true, Assembler::pn, Ldone);
4453   // result is difference in lengths
4454   if (ae == StrIntrinsicNode::UU) {
4455     delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
4456   } else {
4457     delayed()->mov(diff, result);
4458   }
4459 
4460   // Load first characters
4461   if (ae == StrIntrinsicNode::LL) {
4462     stride1 = stride2 = sizeof(jbyte);
4463     ldub(str1, 0, chr1);
4464     ldub(str2, 0, chr2);
4465   } else if (ae == StrIntrinsicNode::UU) {
4466     stride1 = stride2 = sizeof(jchar);
4467     lduh(str1, 0, chr1);
4468     lduh(str2, 0, chr2);
4469   } else {
4470     stride1 = sizeof(jbyte);
4471     stride2 = sizeof(jchar);
4472     ldub(str1, 0, chr1);
4473     lduh(str2, 0, chr2);
4474   }
4475 
4476   // Compare first characters
4477   subcc(chr1, chr2, chr1);
4478   br(Assembler::notZero, false, Assembler::pt, Ldone);
4479   assert(chr1 == result, "result must be pre-placed");
4480   delayed()->nop();
4481 
4482   // Check if the strings start at same location
4483   cmp(str1, str2);
4484   brx(Assembler::equal, true, Assembler::pn, Ldone);
4485   delayed()->mov(G0, result);  // result is zero
4486 
4487   // We have no guarantee that on 64 bit the higher half of limit is 0
4488   signx(limit1);
4489 
4490   // Get limit
4491   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
4492     sll(limit1, 1, limit2);
4493     subcc(limit2, stride2, chr2);
4494   }
4495   subcc(limit1, stride1, chr1);
4496   br(Assembler::zero, true, Assembler::pn, Ldone);
4497   // result is difference in lengths
4498   if (ae == StrIntrinsicNode::UU) {
4499     delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
4500   } else {
4501     delayed()->mov(diff, result);
4502   }
4503 
4504   // Shift str1 and str2 to the end of the arrays, negate limit
4505   add(str1, limit1, str1);
4506   add(str2, limit2, str2);
4507   neg(chr1, limit1);  // limit1 = -(limit1-stride1)
4508   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
4509     neg(chr2, limit2);  // limit2 = -(limit2-stride2)
4510   }
4511 
4512   // Compare the rest of the characters
4513   if (ae == StrIntrinsicNode::UU) {
4514     lduh(str1, limit1, chr1);
4515   } else {
4516     ldub(str1, limit1, chr1);
4517   }
4518 
4519   bind(Lloop);
4520   if (ae == StrIntrinsicNode::LL) {
4521     ldub(str2, limit2, chr2);
4522   } else {
4523     lduh(str2, limit2, chr2);
4524   }
4525 
4526   subcc(chr1, chr2, chr1);
4527   br(Assembler::notZero, false, Assembler::pt, Ldone);
4528   assert(chr1 == result, "result must be pre-placed");
4529   delayed()->inccc(limit1, stride1);
4530   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
4531     inccc(limit2, stride2);
4532   }
4533 
4534   // annul LDUB if branch is not taken to prevent access past end of string
4535   br(Assembler::notZero, true, Assembler::pt, Lloop);
4536   if (ae == StrIntrinsicNode::UU) {
4537     delayed()->lduh(str1, limit2, chr1);
4538   } else {
4539     delayed()->ldub(str1, limit1, chr1);
4540   }
4541 
4542   // If strings are equal up to min length, return the length difference.
4543   if (ae == StrIntrinsicNode::UU) {
4544     // Divide by 2 to get number of chars
4545     sra(diff, 1, result);
4546   } else {
4547     mov(diff, result);
4548   }
4549 
4550   // Otherwise, return the difference between the first mismatched chars.
4551   bind(Ldone);
4552   if(ae == StrIntrinsicNode::UL) {
4553     // Negate result (see note above)
4554     neg(result);
4555   }
4556 }
4557 
4558 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
4559                                   Register limit, Register tmp, Register result, bool is_byte) {
4560   Label Ldone, Lvector, Lloop;
4561   assert_different_registers(ary1, ary2, limit, tmp, result);
4562 
4563   int length_offset  = arrayOopDesc::length_offset_in_bytes();
4564   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
4565 
4566   if (is_array_equ) {
4567     // return true if the same array
4568     cmp(ary1, ary2);
4569     brx(Assembler::equal, true, Assembler::pn, Ldone);
4570     delayed()->add(G0, 1, result); // equal
4571 
4572     br_null(ary1, true, Assembler::pn, Ldone);
4573     delayed()->mov(G0, result);    // not equal
4574 
4575     br_null(ary2, true, Assembler::pn, Ldone);
4576     delayed()->mov(G0, result);    // not equal
4577 
4578     // load the lengths of arrays
4579     ld(Address(ary1, length_offset), limit);
4580     ld(Address(ary2, length_offset), tmp);
4581 
4582     // return false if the two arrays are not equal length
4583     cmp(limit, tmp);
4584     br(Assembler::notEqual, true, Assembler::pn, Ldone);
4585     delayed()->mov(G0, result);    // not equal
4586   }
4587 
4588   cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
4589   delayed()->add(G0, 1, result); // zero-length arrays are equal
4590 
4591   if (is_array_equ) {
4592     // load array addresses
4593     add(ary1, base_offset, ary1);
4594     add(ary2, base_offset, ary2);
4595   } else {
4596     // We have no guarantee that on 64 bit the higher half of limit is 0
4597     signx(limit);
4598   }
4599 
4600   if (is_byte) {
4601     Label Lskip;
4602     // check for trailing byte
4603     andcc(limit, 0x1, tmp);
4604     br(Assembler::zero, false, Assembler::pt, Lskip);
4605     delayed()->nop();
4606 
4607     // compare the trailing byte
4608     sub(limit, sizeof(jbyte), limit);
4609     ldub(ary1, limit, result);
4610     ldub(ary2, limit, tmp);
4611     cmp(result, tmp);
4612     br(Assembler::notEqual, true, Assembler::pt, Ldone);
4613     delayed()->mov(G0, result);    // not equal
4614 
4615     // only one byte?
4616     cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
4617     delayed()->add(G0, 1, result); // zero-length arrays are equal
4618     bind(Lskip);
4619   } else if (is_array_equ) {
4620     // set byte count
4621     sll(limit, exact_log2(sizeof(jchar)), limit);
4622   }
4623 
4624   // check for trailing character
4625   andcc(limit, 0x2, tmp);
4626   br(Assembler::zero, false, Assembler::pt, Lvector);
4627   delayed()->nop();
4628 
4629   // compare the trailing char
4630   sub(limit, sizeof(jchar), limit);
4631   lduh(ary1, limit, result);
4632   lduh(ary2, limit, tmp);
4633   cmp(result, tmp);
4634   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4635   delayed()->mov(G0, result);     // not equal
4636 
4637   // only one char?
4638   cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
4639   delayed()->add(G0, 1, result); // zero-length arrays are equal
4640 
4641   // word by word compare, dont't need alignment check
4642   bind(Lvector);
4643   // Shift ary1 and ary2 to the end of the arrays, negate limit
4644   add(ary1, limit, ary1);
4645   add(ary2, limit, ary2);
4646   neg(limit, limit);
4647 
4648   lduw(ary1, limit, result);
4649   bind(Lloop);
4650   lduw(ary2, limit, tmp);
4651   cmp(result, tmp);
4652   br(Assembler::notEqual, true, Assembler::pt, Ldone);
4653   delayed()->mov(G0, result);     // not equal
4654   inccc(limit, 2*sizeof(jchar));
4655   // annul LDUW if branch is not taken to prevent access past end of array
4656   br(Assembler::notZero, true, Assembler::pt, Lloop);
4657   delayed()->lduw(ary1, limit, result); // hoisted
4658 
4659   add(G0, 1, result); // equals
4660   bind(Ldone);
4661 }
4662 
4663 #endif
4664 
4665 // Use BIS for zeroing (count is in bytes).
4666 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
4667   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
4668   Register end = count;
4669   int cache_line_size = VM_Version::prefetch_data_size();
4670   // Minimum count when BIS zeroing can be used since
4671   // it needs membar which is expensive.
4672   int block_zero_size  = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit);
4673 
4674   Label small_loop;
4675   // Check if count is negative (dead code) or zero.
4676   // Note, count uses 64bit in 64 bit VM.
4677   cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone);
4678 
4679   // Use BIS zeroing only for big arrays since it requires membar.
4680   if (Assembler::is_simm13(block_zero_size)) { // < 4096
4681     cmp(count, block_zero_size);
4682   } else {
4683     set(block_zero_size, temp);


< prev index next >