src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8033805 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




  81 };
  82 
  83 
  84 // Implementation of MacroAssembler
  85 
  86 // First all the versions that have distinct versions depending on 32/64 bit
  87 // Unless the difference is trivial (1 line or so).
  88 
  89 #ifndef _LP64
  90 
  91 // 32bit versions
  92 
  93 Address MacroAssembler::as_Address(AddressLiteral adr) {
  94   return Address(adr.target(), adr.rspec());
  95 }
  96 
  97 Address MacroAssembler::as_Address(ArrayAddress adr) {
  98   return Address::make_array(adr);
  99 }
 100 
 101 int MacroAssembler::biased_locking_enter(Register lock_reg,
 102                                          Register obj_reg,
 103                                          Register swap_reg,
 104                                          Register tmp_reg,
 105                                          bool swap_reg_contains_mark,
 106                                          Label& done,
 107                                          Label* slow_case,
 108                                          BiasedLockingCounters* counters) {
 109   assert(UseBiasedLocking, "why call this otherwise?");
 110   assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg");
 111   assert_different_registers(lock_reg, obj_reg, swap_reg);
 112 
 113   if (PrintBiasedLockingStatistics && counters == NULL)
 114     counters = BiasedLocking::counters();
 115 
 116   bool need_tmp_reg = false;
 117   if (tmp_reg == noreg) {
 118     need_tmp_reg = true;
 119     tmp_reg = lock_reg;
 120   } else {
 121     assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
 122   }
 123   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
 124   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
 125   Address klass_addr     (obj_reg, oopDesc::klass_offset_in_bytes());
 126   Address saved_mark_addr(lock_reg, 0);
 127 
 128   // Biased locking
 129   // See whether the lock is currently biased toward our thread and
 130   // whether the epoch is still valid
 131   // Note that the runtime guarantees sufficient alignment of JavaThread
 132   // pointers to allow age to be placed into low bits
 133   // First check to see whether biasing is even enabled for this object
 134   Label cas_label;
 135   int null_check_offset = -1;
 136   if (!swap_reg_contains_mark) {
 137     null_check_offset = offset();
 138     movl(swap_reg, mark_addr);
 139   }
 140   if (need_tmp_reg) {
 141     push(tmp_reg);
 142   }
 143   movl(tmp_reg, swap_reg);
 144   andl(tmp_reg, markOopDesc::biased_lock_mask_in_place);
 145   cmpl(tmp_reg, markOopDesc::biased_lock_pattern);
 146   if (need_tmp_reg) {
 147     pop(tmp_reg);
 148   }
 149   jcc(Assembler::notEqual, cas_label);
 150   // The bias pattern is present in the object's header. Need to check
 151   // whether the bias owner and the epoch are both still current.
 152   // Note that because there is no current thread register on x86 we
 153   // need to store off the mark word we read out of the object to
 154   // avoid reloading it and needing to recheck invariants below. This
 155   // store is unfortunate but it makes the overall code shorter and
 156   // simpler.
 157   movl(saved_mark_addr, swap_reg);
 158   if (need_tmp_reg) {
 159     push(tmp_reg);
 160   }
 161   get_thread(tmp_reg);
 162   xorl(swap_reg, tmp_reg);
 163   if (swap_reg_contains_mark) {
 164     null_check_offset = offset();
 165   }
 166   movl(tmp_reg, klass_addr);
 167   xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset()));
 168   andl(swap_reg, ~((int) markOopDesc::age_mask_in_place));
 169   if (need_tmp_reg) {
 170     pop(tmp_reg);
 171   }
 172   if (counters != NULL) {
 173     cond_inc32(Assembler::zero,
 174                ExternalAddress((address)counters->biased_lock_entry_count_addr()));
 175   }
 176   jcc(Assembler::equal, done);
 177 
 178   Label try_revoke_bias;
 179   Label try_rebias;
 180 
 181   // At this point we know that the header has the bias pattern and
 182   // that we are not the bias owner in the current epoch. We need to
 183   // figure out more details about the state of the header in order to
 184   // know what operations can be legally performed on the object's
 185   // header.
 186 
 187   // If the low three bits in the xor result aren't clear, that means
 188   // the prototype header is no longer biased and we have to revoke
 189   // the bias on this object.
 190   testl(swap_reg, markOopDesc::biased_lock_mask_in_place);
 191   jcc(Assembler::notZero, try_revoke_bias);
 192 
 193   // Biasing is still enabled for this data type. See whether the
 194   // epoch of the current bias is still valid, meaning that the epoch
 195   // bits of the mark word are equal to the epoch bits of the
 196   // prototype header. (Note that the prototype header's epoch bits
 197   // only change at a safepoint.) If not, attempt to rebias the object
 198   // toward the current thread. Note that we must be absolutely sure
 199   // that the current epoch is invalid in order to do this because
 200   // otherwise the manipulations it performs on the mark word are
 201   // illegal.
 202   testl(swap_reg, markOopDesc::epoch_mask_in_place);
 203   jcc(Assembler::notZero, try_rebias);
 204 
 205   // The epoch of the current bias is still valid but we know nothing
 206   // about the owner; it might be set or it might be clear. Try to
 207   // acquire the bias of the object using an atomic operation. If this
 208   // fails we will go in to the runtime to revoke the object's bias.
 209   // Note that we first construct the presumed unbiased header so we
 210   // don't accidentally blow away another thread's valid bias.
 211   movl(swap_reg, saved_mark_addr);
 212   andl(swap_reg,
 213        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
 214   if (need_tmp_reg) {
 215     push(tmp_reg);
 216   }
 217   get_thread(tmp_reg);
 218   orl(tmp_reg, swap_reg);
 219   if (os::is_MP()) {
 220     lock();
 221   }
 222   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
 223   if (need_tmp_reg) {
 224     pop(tmp_reg);
 225   }
 226   // If the biasing toward our thread failed, this means that
 227   // another thread succeeded in biasing it toward itself and we
 228   // need to revoke that bias. The revocation will occur in the
 229   // interpreter runtime in the slow case.
 230   if (counters != NULL) {
 231     cond_inc32(Assembler::zero,
 232                ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr()));
 233   }
 234   if (slow_case != NULL) {
 235     jcc(Assembler::notZero, *slow_case);
 236   }
 237   jmp(done);
 238 
 239   bind(try_rebias);
 240   // At this point we know the epoch has expired, meaning that the
 241   // current "bias owner", if any, is actually invalid. Under these
 242   // circumstances _only_, we are allowed to use the current header's
 243   // value as the comparison value when doing the cas to acquire the
 244   // bias in the current epoch. In other words, we allow transfer of
 245   // the bias from one thread to another directly in this situation.
 246   //
 247   // FIXME: due to a lack of registers we currently blow away the age
 248   // bits in this situation. Should attempt to preserve them.
 249   if (need_tmp_reg) {
 250     push(tmp_reg);
 251   }
 252   get_thread(tmp_reg);
 253   movl(swap_reg, klass_addr);
 254   orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset()));
 255   movl(swap_reg, saved_mark_addr);
 256   if (os::is_MP()) {
 257     lock();
 258   }
 259   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
 260   if (need_tmp_reg) {
 261     pop(tmp_reg);
 262   }
 263   // If the biasing toward our thread failed, then another thread
 264   // succeeded in biasing it toward itself and we need to revoke that
 265   // bias. The revocation will occur in the runtime in the slow case.
 266   if (counters != NULL) {
 267     cond_inc32(Assembler::zero,
 268                ExternalAddress((address)counters->rebiased_lock_entry_count_addr()));
 269   }
 270   if (slow_case != NULL) {
 271     jcc(Assembler::notZero, *slow_case);
 272   }
 273   jmp(done);
 274 
 275   bind(try_revoke_bias);
 276   // The prototype mark in the klass doesn't have the bias bit set any
 277   // more, indicating that objects of this data type are not supposed
 278   // to be biased any more. We are going to try to reset the mark of
 279   // this object to the prototype value and fall through to the
 280   // CAS-based locking scheme. Note that if our CAS fails, it means
 281   // that another thread raced us for the privilege of revoking the
 282   // bias of this particular object, so it's okay to continue in the
 283   // normal locking code.
 284   //
 285   // FIXME: due to a lack of registers we currently blow away the age
 286   // bits in this situation. Should attempt to preserve them.
 287   movl(swap_reg, saved_mark_addr);
 288   if (need_tmp_reg) {
 289     push(tmp_reg);
 290   }
 291   movl(tmp_reg, klass_addr);
 292   movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
 293   if (os::is_MP()) {
 294     lock();
 295   }
 296   cmpxchgptr(tmp_reg, Address(obj_reg, 0));
 297   if (need_tmp_reg) {
 298     pop(tmp_reg);
 299   }
 300   // Fall through to the normal CAS-based lock, because no matter what
 301   // the result of the above CAS, some thread must have succeeded in
 302   // removing the bias bit from the object's header.
 303   if (counters != NULL) {
 304     cond_inc32(Assembler::zero,
 305                ExternalAddress((address)counters->revoked_lock_entry_count_addr()));
 306   }
 307 
 308   bind(cas_label);
 309 
 310   return null_check_offset;
 311 }
 312 void MacroAssembler::call_VM_leaf_base(address entry_point,
 313                                        int number_of_arguments) {
 314   call(RuntimeAddress(entry_point));
 315   increment(rsp, number_of_arguments * wordSize);
 316 }
 317 
 318 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 319   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 320 }
 321 
 322 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 323   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 324 }
 325 
 326 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 327   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 328 }
 329 
 330 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 331   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());


 709 
 710 Address MacroAssembler::as_Address(AddressLiteral adr) {
 711   // amd64 always does this as a pc-rel
 712   // we can be absolute or disp based on the instruction type
 713   // jmp/call are displacements others are absolute
 714   assert(!adr.is_lval(), "must be rval");
 715   assert(reachable(adr), "must be");
 716   return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
 717 
 718 }
 719 
 720 Address MacroAssembler::as_Address(ArrayAddress adr) {
 721   AddressLiteral base = adr.base();
 722   lea(rscratch1, base);
 723   Address index = adr.index();
 724   assert(index._disp == 0, "must not have disp"); // maybe it can?
 725   Address array(rscratch1, index._index, index._scale, index._disp);
 726   return array;
 727 }
 728 
 729 int MacroAssembler::biased_locking_enter(Register lock_reg,
 730                                          Register obj_reg,
 731                                          Register swap_reg,
 732                                          Register tmp_reg,
 733                                          bool swap_reg_contains_mark,
 734                                          Label& done,
 735                                          Label* slow_case,
 736                                          BiasedLockingCounters* counters) {
 737   assert(UseBiasedLocking, "why call this otherwise?");
 738   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
 739   assert(tmp_reg != noreg, "tmp_reg must be supplied");
 740   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
 741   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
 742   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
 743   Address saved_mark_addr(lock_reg, 0);
 744 
 745   if (PrintBiasedLockingStatistics && counters == NULL)
 746     counters = BiasedLocking::counters();
 747 
 748   // Biased locking
 749   // See whether the lock is currently biased toward our thread and
 750   // whether the epoch is still valid
 751   // Note that the runtime guarantees sufficient alignment of JavaThread
 752   // pointers to allow age to be placed into low bits
 753   // First check to see whether biasing is even enabled for this object
 754   Label cas_label;
 755   int null_check_offset = -1;
 756   if (!swap_reg_contains_mark) {
 757     null_check_offset = offset();
 758     movq(swap_reg, mark_addr);
 759   }
 760   movq(tmp_reg, swap_reg);
 761   andq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
 762   cmpq(tmp_reg, markOopDesc::biased_lock_pattern);
 763   jcc(Assembler::notEqual, cas_label);
 764   // The bias pattern is present in the object's header. Need to check
 765   // whether the bias owner and the epoch are both still current.
 766   load_prototype_header(tmp_reg, obj_reg);
 767   orq(tmp_reg, r15_thread);
 768   xorq(tmp_reg, swap_reg);
 769   andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place));
 770   if (counters != NULL) {
 771     cond_inc32(Assembler::zero,
 772                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
 773   }
 774   jcc(Assembler::equal, done);
 775 
 776   Label try_revoke_bias;
 777   Label try_rebias;
 778 
 779   // At this point we know that the header has the bias pattern and
 780   // that we are not the bias owner in the current epoch. We need to
 781   // figure out more details about the state of the header in order to
 782   // know what operations can be legally performed on the object's
 783   // header.
 784 
 785   // If the low three bits in the xor result aren't clear, that means
 786   // the prototype header is no longer biased and we have to revoke
 787   // the bias on this object.
 788   testq(tmp_reg, markOopDesc::biased_lock_mask_in_place);
 789   jcc(Assembler::notZero, try_revoke_bias);
 790 
 791   // Biasing is still enabled for this data type. See whether the
 792   // epoch of the current bias is still valid, meaning that the epoch
 793   // bits of the mark word are equal to the epoch bits of the
 794   // prototype header. (Note that the prototype header's epoch bits
 795   // only change at a safepoint.) If not, attempt to rebias the object
 796   // toward the current thread. Note that we must be absolutely sure
 797   // that the current epoch is invalid in order to do this because
 798   // otherwise the manipulations it performs on the mark word are
 799   // illegal.
 800   testq(tmp_reg, markOopDesc::epoch_mask_in_place);
 801   jcc(Assembler::notZero, try_rebias);
 802 
 803   // The epoch of the current bias is still valid but we know nothing
 804   // about the owner; it might be set or it might be clear. Try to
 805   // acquire the bias of the object using an atomic operation. If this
 806   // fails we will go in to the runtime to revoke the object's bias.
 807   // Note that we first construct the presumed unbiased header so we
 808   // don't accidentally blow away another thread's valid bias.
 809   andq(swap_reg,
 810        markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
 811   movq(tmp_reg, swap_reg);
 812   orq(tmp_reg, r15_thread);
 813   if (os::is_MP()) {
 814     lock();
 815   }
 816   cmpxchgq(tmp_reg, Address(obj_reg, 0));
 817   // If the biasing toward our thread failed, this means that
 818   // another thread succeeded in biasing it toward itself and we
 819   // need to revoke that bias. The revocation will occur in the
 820   // interpreter runtime in the slow case.
 821   if (counters != NULL) {
 822     cond_inc32(Assembler::zero,
 823                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
 824   }
 825   if (slow_case != NULL) {
 826     jcc(Assembler::notZero, *slow_case);
 827   }
 828   jmp(done);
 829 
 830   bind(try_rebias);
 831   // At this point we know the epoch has expired, meaning that the
 832   // current "bias owner", if any, is actually invalid. Under these
 833   // circumstances _only_, we are allowed to use the current header's
 834   // value as the comparison value when doing the cas to acquire the
 835   // bias in the current epoch. In other words, we allow transfer of
 836   // the bias from one thread to another directly in this situation.
 837   //
 838   // FIXME: due to a lack of registers we currently blow away the age
 839   // bits in this situation. Should attempt to preserve them.
 840   load_prototype_header(tmp_reg, obj_reg);
 841   orq(tmp_reg, r15_thread);
 842   if (os::is_MP()) {
 843     lock();
 844   }
 845   cmpxchgq(tmp_reg, Address(obj_reg, 0));
 846   // If the biasing toward our thread failed, then another thread
 847   // succeeded in biasing it toward itself and we need to revoke that
 848   // bias. The revocation will occur in the runtime in the slow case.
 849   if (counters != NULL) {
 850     cond_inc32(Assembler::zero,
 851                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
 852   }
 853   if (slow_case != NULL) {
 854     jcc(Assembler::notZero, *slow_case);
 855   }
 856   jmp(done);
 857 
 858   bind(try_revoke_bias);
 859   // The prototype mark in the klass doesn't have the bias bit set any
 860   // more, indicating that objects of this data type are not supposed
 861   // to be biased any more. We are going to try to reset the mark of
 862   // this object to the prototype value and fall through to the
 863   // CAS-based locking scheme. Note that if our CAS fails, it means
 864   // that another thread raced us for the privilege of revoking the
 865   // bias of this particular object, so it's okay to continue in the
 866   // normal locking code.
 867   //
 868   // FIXME: due to a lack of registers we currently blow away the age
 869   // bits in this situation. Should attempt to preserve them.
 870   load_prototype_header(tmp_reg, obj_reg);
 871   if (os::is_MP()) {
 872     lock();
 873   }
 874   cmpxchgq(tmp_reg, Address(obj_reg, 0));
 875   // Fall through to the normal CAS-based lock, because no matter what
 876   // the result of the above CAS, some thread must have succeeded in
 877   // removing the bias bit from the object's header.
 878   if (counters != NULL) {
 879     cond_inc32(Assembler::zero,
 880                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
 881   }
 882 
 883   bind(cas_label);
 884 
 885   return null_check_offset;
 886 }
 887 
 888 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
 889   Label L, E;
 890 
 891 #ifdef _WIN64
 892   // Windows always allocates space for it's register args
 893   assert(num_args <= 4, "only register arguments supported");
 894   subq(rsp,  frame::arg_reg_save_area_bytes);
 895 #endif
 896 
 897   // Align stack if necessary
 898   testl(rsp, 15);
 899   jcc(Assembler::zero, L);
 900 
 901   subq(rsp, 8);
 902   {
 903     call(RuntimeAddress(entry_point));
 904   }
 905   addq(rsp, 8);
 906   jmp(E);


1343   }
1344 }
1345 
1346 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
1347   // Used in sign-masking with aligned address.
1348   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
1349   if (reachable(src)) {
1350     Assembler::andps(dst, as_Address(src));
1351   } else {
1352     lea(rscratch1, src);
1353     Assembler::andps(dst, Address(rscratch1, 0));
1354   }
1355 }
1356 
1357 void MacroAssembler::andptr(Register dst, int32_t imm32) {
1358   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
1359 }
1360 
1361 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
1362   pushf();






1363   if (os::is_MP())
1364     lock();
1365   incrementl(counter_addr);

1366   popf();
1367 }
1368 
1369 // Writes to stack successive pages until offset reached to check for
1370 // stack overflow + shadow pages.  This clobbers tmp.
1371 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1372   movptr(tmp, rsp);
1373   // Bang stack for total size given plus shadow page size.
1374   // Bang one page at a time because large size can bang beyond yellow and
1375   // red zones.
1376   Label loop;
1377   bind(loop);
1378   movl(Address(tmp, (-os::vm_page_size())), size );
1379   subptr(tmp, os::vm_page_size());
1380   subl(size, os::vm_page_size());
1381   jcc(Assembler::greater, loop);
1382 
1383   // Bang down shadow pages too.
1384   // At this point, (tmp-0) is the last address touched, so don't
1385   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1386   // was post-decremented.)  Skip this address by starting at i=1, and
1387   // touch a few more pages below.  N.B.  It is important to touch all
1388   // the way down to and including i=StackShadowPages.
1389   for (int i = 1; i <= StackShadowPages; i++) {
1390     // this could be any sized move but this is can be a debugging crumb
1391     // so the bigger the better.
1392     movptr(Address(tmp, (-i*os::vm_page_size())), size );
1393   }
1394 }
1395 




































































































































































































































1396 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1397   assert(UseBiasedLocking, "why call this otherwise?");
1398 
1399   // Check for biased locking unlock case, which is a no-op
1400   // Note: we do not have to check the thread ID for two reasons.
1401   // First, the interpreter checks for IllegalMonitorStateException at
1402   // a higher level. Second, if the bias was revoked while we held the
1403   // lock, the object could not be rebiased toward another thread, so
1404   // the bias bit would be clear.
1405   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1406   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1407   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1408   jcc(Assembler::equal, done);
1409 }
1410 






































































































































































































































































































































































































































































































































































































































1411 void MacroAssembler::c2bool(Register x) {
1412   // implements x == 0 ? 0 : 1
1413   // note: must only look at least-significant byte of x
1414   //       since C-style booleans are stored in one byte
1415   //       only! (was bug)
1416   andl(x, 0xFF);
1417   setb(Assembler::notZero, x);
1418 }
1419 
1420 // Wouldn't need if AddressLiteral version had new name
1421 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
1422   Assembler::call(L, rtype);
1423 }
1424 
1425 void MacroAssembler::call(Register entry) {
1426   Assembler::call(entry);
1427 }
1428 
1429 void MacroAssembler::call(AddressLiteral entry) {
1430   if (reachable(entry)) {




  81 };
  82 
  83 
  84 // Implementation of MacroAssembler
  85 
  86 // First all the versions that have distinct versions depending on 32/64 bit
  87 // Unless the difference is trivial (1 line or so).
  88 
  89 #ifndef _LP64
  90 
  91 // 32bit versions
  92 
  93 Address MacroAssembler::as_Address(AddressLiteral adr) {
  94   return Address(adr.target(), adr.rspec());
  95 }
  96 
  97 Address MacroAssembler::as_Address(ArrayAddress adr) {
  98   return Address::make_array(adr);
  99 }
 100 



















































































































































































































 101 void MacroAssembler::call_VM_leaf_base(address entry_point,
 102                                        int number_of_arguments) {
 103   call(RuntimeAddress(entry_point));
 104   increment(rsp, number_of_arguments * wordSize);
 105 }
 106 
 107 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 108   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 109 }
 110 
 111 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 112   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 113 }
 114 
 115 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 116   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 117 }
 118 
 119 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 120   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());


 498 
 499 Address MacroAssembler::as_Address(AddressLiteral adr) {
 500   // amd64 always does this as a pc-rel
 501   // we can be absolute or disp based on the instruction type
 502   // jmp/call are displacements others are absolute
 503   assert(!adr.is_lval(), "must be rval");
 504   assert(reachable(adr), "must be");
 505   return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
 506 
 507 }
 508 
 509 Address MacroAssembler::as_Address(ArrayAddress adr) {
 510   AddressLiteral base = adr.base();
 511   lea(rscratch1, base);
 512   Address index = adr.index();
 513   assert(index._disp == 0, "must not have disp"); // maybe it can?
 514   Address array(rscratch1, index._index, index._scale, index._disp);
 515   return array;
 516 }
 517 















 518 


 519 



























 520 


 521 





 522 





 523 











 524 




















































































 525 
 526 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
 527   Label L, E;
 528 
 529 #ifdef _WIN64
 530   // Windows always allocates space for it's register args
 531   assert(num_args <= 4, "only register arguments supported");
 532   subq(rsp,  frame::arg_reg_save_area_bytes);
 533 #endif
 534 
 535   // Align stack if necessary
 536   testl(rsp, 15);
 537   jcc(Assembler::zero, L);
 538 
 539   subq(rsp, 8);
 540   {
 541     call(RuntimeAddress(entry_point));
 542   }
 543   addq(rsp, 8);
 544   jmp(E);


 981   }
 982 }
 983 
 984 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
 985   // Used in sign-masking with aligned address.
 986   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
 987   if (reachable(src)) {
 988     Assembler::andps(dst, as_Address(src));
 989   } else {
 990     lea(rscratch1, src);
 991     Assembler::andps(dst, Address(rscratch1, 0));
 992   }
 993 }
 994 
 995 void MacroAssembler::andptr(Register dst, int32_t imm32) {
 996   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
 997 }
 998 
 999 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
1000   pushf();
1001   if (reachable(counter_addr)) {
1002     if (os::is_MP())
1003       lock();
1004     incrementl(as_Address(counter_addr));
1005   } else {
1006     lea(rscratch1, counter_addr);
1007     if (os::is_MP())
1008       lock();
1009     incrementl(Address(rscratch1, 0));
1010   }
1011   popf();
1012 }
1013 
1014 // Writes to stack successive pages until offset reached to check for
1015 // stack overflow + shadow pages.  This clobbers tmp.
1016 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1017   movptr(tmp, rsp);
1018   // Bang stack for total size given plus shadow page size.
1019   // Bang one page at a time because large size can bang beyond yellow and
1020   // red zones.
1021   Label loop;
1022   bind(loop);
1023   movl(Address(tmp, (-os::vm_page_size())), size );
1024   subptr(tmp, os::vm_page_size());
1025   subl(size, os::vm_page_size());
1026   jcc(Assembler::greater, loop);
1027 
1028   // Bang down shadow pages too.
1029   // At this point, (tmp-0) is the last address touched, so don't
1030   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1031   // was post-decremented.)  Skip this address by starting at i=1, and
1032   // touch a few more pages below.  N.B.  It is important to touch all
1033   // the way down to and including i=StackShadowPages.
1034   for (int i = 1; i <= StackShadowPages; i++) {
1035     // this could be any sized move but this is can be a debugging crumb
1036     // so the bigger the better.
1037     movptr(Address(tmp, (-i*os::vm_page_size())), size );
1038   }
1039 }
1040 
1041 int MacroAssembler::biased_locking_enter(Register lock_reg,
1042                                          Register obj_reg,
1043                                          Register swap_reg,
1044                                          Register tmp_reg,
1045                                          bool swap_reg_contains_mark,
1046                                          Label& done,
1047                                          Label* slow_case,
1048                                          BiasedLockingCounters* counters) {
1049   assert(UseBiasedLocking, "why call this otherwise?");
1050   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
1051   LP64_ONLY( assert(tmp_reg != noreg, "tmp_reg must be supplied"); )
1052   bool need_tmp_reg = false;
1053   if (tmp_reg == noreg) {
1054     need_tmp_reg = true;
1055     tmp_reg = lock_reg;
1056     assert_different_registers(lock_reg, obj_reg, swap_reg);
1057   } else {
1058     assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
1059   }
1060   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1061   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
1062   Address saved_mark_addr(lock_reg, 0);
1063 
1064   if (PrintBiasedLockingStatistics && counters == NULL) {
1065     counters = BiasedLocking::counters();
1066   }
1067   // Biased locking
1068   // See whether the lock is currently biased toward our thread and
1069   // whether the epoch is still valid
1070   // Note that the runtime guarantees sufficient alignment of JavaThread
1071   // pointers to allow age to be placed into low bits
1072   // First check to see whether biasing is even enabled for this object
1073   Label cas_label;
1074   int null_check_offset = -1;
1075   if (!swap_reg_contains_mark) {
1076     null_check_offset = offset();
1077     movptr(swap_reg, mark_addr);
1078   }
1079   if (need_tmp_reg) {
1080     push(tmp_reg);
1081   }
1082   movptr(tmp_reg, swap_reg);
1083   andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
1084   cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
1085   if (need_tmp_reg) {
1086     pop(tmp_reg);
1087   }
1088   jcc(Assembler::notEqual, cas_label);
1089   // The bias pattern is present in the object's header. Need to check
1090   // whether the bias owner and the epoch are both still current.
1091 #ifndef _LP64
1092   // Note that because there is no current thread register on x86_32 we
1093   // need to store off the mark word we read out of the object to
1094   // avoid reloading it and needing to recheck invariants below. This
1095   // store is unfortunate but it makes the overall code shorter and
1096   // simpler.
1097   movptr(saved_mark_addr, swap_reg);
1098 #endif
1099   if (need_tmp_reg) {
1100     push(tmp_reg);
1101   }
1102   if (swap_reg_contains_mark) {
1103     null_check_offset = offset();
1104   }
1105   load_prototype_header(tmp_reg, obj_reg);
1106 #ifdef _LP64
1107   orptr(tmp_reg, r15_thread);
1108   xorptr(tmp_reg, swap_reg);
1109   Register header_reg = tmp_reg;
1110 #else
1111   xorptr(tmp_reg, swap_reg);
1112   get_thread(swap_reg);
1113   xorptr(swap_reg, tmp_reg);
1114   Register header_reg = swap_reg;
1115 #endif
1116   andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
1117   if (need_tmp_reg) {
1118     pop(tmp_reg);
1119   }
1120   if (counters != NULL) {
1121     cond_inc32(Assembler::zero,
1122                ExternalAddress((address) counters->biased_lock_entry_count_addr()));
1123   }
1124   jcc(Assembler::equal, done);
1125 
1126   Label try_revoke_bias;
1127   Label try_rebias;
1128 
1129   // At this point we know that the header has the bias pattern and
1130   // that we are not the bias owner in the current epoch. We need to
1131   // figure out more details about the state of the header in order to
1132   // know what operations can be legally performed on the object's
1133   // header.
1134 
1135   // If the low three bits in the xor result aren't clear, that means
1136   // the prototype header is no longer biased and we have to revoke
1137   // the bias on this object.
1138   testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
1139   jccb(Assembler::notZero, try_revoke_bias);
1140 
1141   // Biasing is still enabled for this data type. See whether the
1142   // epoch of the current bias is still valid, meaning that the epoch
1143   // bits of the mark word are equal to the epoch bits of the
1144   // prototype header. (Note that the prototype header's epoch bits
1145   // only change at a safepoint.) If not, attempt to rebias the object
1146   // toward the current thread. Note that we must be absolutely sure
1147   // that the current epoch is invalid in order to do this because
1148   // otherwise the manipulations it performs on the mark word are
1149   // illegal.
1150   testptr(header_reg, markOopDesc::epoch_mask_in_place);
1151   jccb(Assembler::notZero, try_rebias);
1152 
1153   // The epoch of the current bias is still valid but we know nothing
1154   // about the owner; it might be set or it might be clear. Try to
1155   // acquire the bias of the object using an atomic operation. If this
1156   // fails we will go in to the runtime to revoke the object's bias.
1157   // Note that we first construct the presumed unbiased header so we
1158   // don't accidentally blow away another thread's valid bias.
1159   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1160   andptr(swap_reg,
1161          markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1162   if (need_tmp_reg) {
1163     push(tmp_reg);
1164   }
1165 #ifdef _LP64
1166   movptr(tmp_reg, swap_reg);
1167   orptr(tmp_reg, r15_thread);
1168 #else
1169   get_thread(tmp_reg);
1170   orptr(tmp_reg, swap_reg);
1171 #endif
1172   if (os::is_MP()) {
1173     lock();
1174   }
1175   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1176   if (need_tmp_reg) {
1177     pop(tmp_reg);
1178   }
1179   // If the biasing toward our thread failed, this means that
1180   // another thread succeeded in biasing it toward itself and we
1181   // need to revoke that bias. The revocation will occur in the
1182   // interpreter runtime in the slow case.
1183   if (counters != NULL) {
1184     cond_inc32(Assembler::zero,
1185                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
1186   }
1187   if (slow_case != NULL) {
1188     jcc(Assembler::notZero, *slow_case);
1189   }
1190   jmp(done);
1191 
1192   bind(try_rebias);
1193   // At this point we know the epoch has expired, meaning that the
1194   // current "bias owner", if any, is actually invalid. Under these
1195   // circumstances _only_, we are allowed to use the current header's
1196   // value as the comparison value when doing the cas to acquire the
1197   // bias in the current epoch. In other words, we allow transfer of
1198   // the bias from one thread to another directly in this situation.
1199   //
1200   // FIXME: due to a lack of registers we currently blow away the age
1201   // bits in this situation. Should attempt to preserve them.
1202   if (need_tmp_reg) {
1203     push(tmp_reg);
1204   }
1205   load_prototype_header(tmp_reg, obj_reg);
1206 #ifdef _LP64
1207   orptr(tmp_reg, r15_thread);
1208 #else
1209   get_thread(swap_reg);
1210   orptr(tmp_reg, swap_reg);
1211   movptr(swap_reg, saved_mark_addr);
1212 #endif
1213   if (os::is_MP()) {
1214     lock();
1215   }
1216   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1217   if (need_tmp_reg) {
1218     pop(tmp_reg);
1219   }
1220   // If the biasing toward our thread failed, then another thread
1221   // succeeded in biasing it toward itself and we need to revoke that
1222   // bias. The revocation will occur in the runtime in the slow case.
1223   if (counters != NULL) {
1224     cond_inc32(Assembler::zero,
1225                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
1226   }
1227   if (slow_case != NULL) {
1228     jcc(Assembler::notZero, *slow_case);
1229   }
1230   jmp(done);
1231 
1232   bind(try_revoke_bias);
1233   // The prototype mark in the klass doesn't have the bias bit set any
1234   // more, indicating that objects of this data type are not supposed
1235   // to be biased any more. We are going to try to reset the mark of
1236   // this object to the prototype value and fall through to the
1237   // CAS-based locking scheme. Note that if our CAS fails, it means
1238   // that another thread raced us for the privilege of revoking the
1239   // bias of this particular object, so it's okay to continue in the
1240   // normal locking code.
1241   //
1242   // FIXME: due to a lack of registers we currently blow away the age
1243   // bits in this situation. Should attempt to preserve them.
1244   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1245   if (need_tmp_reg) {
1246     push(tmp_reg);
1247   }
1248   load_prototype_header(tmp_reg, obj_reg);
1249   if (os::is_MP()) {
1250     lock();
1251   }
1252   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1253   if (need_tmp_reg) {
1254     pop(tmp_reg);
1255   }
1256   // Fall through to the normal CAS-based lock, because no matter what
1257   // the result of the above CAS, some thread must have succeeded in
1258   // removing the bias bit from the object's header.
1259   if (counters != NULL) {
1260     cond_inc32(Assembler::zero,
1261                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1262   }
1263 
1264   bind(cas_label);
1265 
1266   return null_check_offset;
1267 }
1268 
1269 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1270   assert(UseBiasedLocking, "why call this otherwise?");
1271 
1272   // Check for biased locking unlock case, which is a no-op
1273   // Note: we do not have to check the thread ID for two reasons.
1274   // First, the interpreter checks for IllegalMonitorStateException at
1275   // a higher level. Second, if the bias was revoked while we held the
1276   // lock, the object could not be rebiased toward another thread, so
1277   // the bias bit would be clear.
1278   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1279   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1280   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1281   jcc(Assembler::equal, done);
1282 }
1283 
1284 #ifdef COMPILER2
1285 // Fast_Lock and Fast_Unlock used by C2
1286 
1287 // Because the transitions from emitted code to the runtime
1288 // monitorenter/exit helper stubs are so slow it's critical that
1289 // we inline both the stack-locking fast-path and the inflated fast path.
1290 //
1291 // See also: cmpFastLock and cmpFastUnlock.
1292 //
1293 // What follows is a specialized inline transliteration of the code
1294 // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
1295 // another option would be to emit TrySlowEnter and TrySlowExit methods
1296 // at startup-time.  These methods would accept arguments as
1297 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1298 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
1299 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1300 // In practice, however, the # of lock sites is bounded and is usually small.
1301 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
1302 // if the processor uses simple bimodal branch predictors keyed by EIP
1303 // Since the helper routines would be called from multiple synchronization
1304 // sites.
1305 //
1306 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
1307 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
1308 // to those specialized methods.  That'd give us a mostly platform-independent
1309 // implementation that the JITs could optimize and inline at their pleasure.
1310 // Done correctly, the only time we'd need to cross to native could would be
1311 // to park() or unpark() threads.  We'd also need a few more unsafe operators
1312 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
1313 // (b) explicit barriers or fence operations.
1314 //
1315 // TODO:
1316 //
1317 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
1318 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
1319 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
1320 //    the lock operators would typically be faster than reifying Self.
1321 //
1322 // *  Ideally I'd define the primitives as:
1323 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
1324 //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
1325 //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
1326 //    Instead, we're stuck with a rather awkward and brittle register assignments below.
1327 //    Furthermore the register assignments are overconstrained, possibly resulting in
1328 //    sub-optimal code near the synchronization site.
1329 //
1330 // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
1331 //    Alternately, use a better sp-proximity test.
1332 //
1333 // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
1334 //    Either one is sufficient to uniquely identify a thread.
1335 //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
1336 //
1337 // *  Intrinsify notify() and notifyAll() for the common cases where the
1338 //    object is locked by the calling thread but the waitlist is empty.
1339 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
1340 //
1341 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
1342 //    But beware of excessive branch density on AMD Opterons.
1343 //
1344 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
1345 //    or failure of the fast-path.  If the fast-path fails then we pass
1346 //    control to the slow-path, typically in C.  In Fast_Lock and
1347 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
1348 //    will emit a conditional branch immediately after the node.
1349 //    So we have branches to branches and lots of ICC.ZF games.
1350 //    Instead, it might be better to have C2 pass a "FailureLabel"
1351 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
1352 //    will drop through the node.  ICC.ZF is undefined at exit.
1353 //    In the case of failure, the node will branch directly to the
1354 //    FailureLabel
1355 
1356 
1357 // obj: object to lock
1358 // box: on-stack box address (displaced header location) - KILLED
1359 // rax,: tmp -- KILLED
1360 // scr: tmp -- KILLED
1361 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
1362   // Ensure the register assignents are disjoint
1363   guarantee (objReg != boxReg, "");
1364   guarantee (objReg != tmpReg, "");
1365   guarantee (objReg != scrReg, "");
1366   guarantee (boxReg != tmpReg, "");
1367   guarantee (boxReg != scrReg, "");
1368   guarantee (tmpReg == rax, "");
1369 
1370   if (counters != NULL) {
1371     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
1372   }
1373   if (EmitSync & 1) {
1374       // set box->dhw = unused_mark (3)
1375       // Force all sync thru slow-path: slow_enter() and slow_exit()
1376       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1377       cmpptr (rsp, (int32_t)NULL_WORD);
1378   } else
1379   if (EmitSync & 2) {
1380       Label DONE_LABEL ;
1381       if (UseBiasedLocking) {
1382          // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
1383          biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1384       }
1385 
1386       movptr(tmpReg, Address(objReg, 0));           // fetch markword
1387       orptr (tmpReg, 0x1);
1388       movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS
1389       if (os::is_MP()) {
1390         lock();
1391       }
1392       cmpxchgptr(boxReg, Address(objReg, 0));       // Updates tmpReg
1393       jccb(Assembler::equal, DONE_LABEL);
1394       // Recursive locking
1395       subptr(tmpReg, rsp);
1396       andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1397       movptr(Address(boxReg, 0), tmpReg);
1398       bind(DONE_LABEL);
1399   } else {
1400     // Possible cases that we'll encounter in fast_lock
1401     // ------------------------------------------------
1402     // * Inflated
1403     //    -- unlocked
1404     //    -- Locked
1405     //       = by self
1406     //       = by other
1407     // * biased
1408     //    -- by Self
1409     //    -- by other
1410     // * neutral
1411     // * stack-locked
1412     //    -- by self
1413     //       = sp-proximity test hits
1414     //       = sp-proximity test generates false-negative
1415     //    -- by other
1416     //
1417 
1418     Label IsInflated, DONE_LABEL;
1419 
1420     // it's stack-locked, biased or neutral
1421     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1422     // order to reduce the number of conditional branches in the most common cases.
1423     // Beware -- there's a subtle invariant that fetch of the markword
1424     // at [FETCH], below, will never observe a biased encoding (*101b).
1425     // If this invariant is not held we risk exclusion (safety) failure.
1426     if (UseBiasedLocking && !UseOptoBiasInlining) {
1427       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
1428     }
1429 
1430     movptr(tmpReg, Address(objReg, 0));          // [FETCH]
1431     testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1432     jccb  (Assembler::notZero, IsInflated);
1433 
1434     // Attempt stack-locking ...
1435     orptr (tmpReg, 0x1);
1436     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1437     if (os::is_MP()) {
1438       lock();
1439     }
1440     cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
1441     if (counters != NULL) {
1442       cond_inc32(Assembler::equal,
1443                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1444     }
1445     jccb(Assembler::equal, DONE_LABEL);
1446 
1447     // Recursive locking
1448     subptr(tmpReg, rsp);
1449     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1450     movptr(Address(boxReg, 0), tmpReg);
1451     if (counters != NULL) {
1452       cond_inc32(Assembler::equal,
1453                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1454     }
1455     jmpb(DONE_LABEL);
1456 
1457     bind(IsInflated);
1458 #ifndef _LP64
1459     // The object is inflated.
1460     //
1461     // TODO-FIXME: eliminate the ugly use of manifest constants:
1462     //   Use markOopDesc::monitor_value instead of "2".
1463     //   use markOop::unused_mark() instead of "3".
1464     // The tmpReg value is an objectMonitor reference ORed with
1465     // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
1466     // objectmonitor pointer by masking off the "2" bit or we can just
1467     // use tmpReg as an objectmonitor pointer but bias the objectmonitor
1468     // field offsets with "-2" to compensate for and annul the low-order tag bit.
1469     //
1470     // I use the latter as it avoids AGI stalls.
1471     // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
1472     // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
1473     //
1474     #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
1475 
1476     // boxReg refers to the on-stack BasicLock in the current frame.
1477     // We'd like to write:
1478     //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
1479     // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
1480     // additional latency as we have another ST in the store buffer that must drain.
1481 
1482     if (EmitSync & 8192) {
1483        movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
1484        get_thread (scrReg);
1485        movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
1486        movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
1487        if (os::is_MP()) {
1488          lock();
1489        }
1490        cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1491     } else
1492     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
1493        movptr(scrReg, boxReg);
1494        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1495 
1496        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1497        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1498           // prefetchw [eax + Offset(_owner)-2]
1499           prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1500        }
1501 
1502        if ((EmitSync & 64) == 0) {
1503          // Optimistic form: consider XORL tmpReg,tmpReg
1504          movptr(tmpReg, NULL_WORD);
1505        } else {
1506          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1507          // Test-And-CAS instead of CAS
1508          movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
1509          testptr(tmpReg, tmpReg);                   // Locked ?
1510          jccb  (Assembler::notZero, DONE_LABEL);
1511        }
1512 
1513        // Appears unlocked - try to swing _owner from null to non-null.
1514        // Ideally, I'd manifest "Self" with get_thread and then attempt
1515        // to CAS the register containing Self into m->Owner.
1516        // But we don't have enough registers, so instead we can either try to CAS
1517        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1518        // we later store "Self" into m->Owner.  Transiently storing a stack address
1519        // (rsp or the address of the box) into  m->owner is harmless.
1520        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1521        if (os::is_MP()) {
1522          lock();
1523        }
1524        cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1525        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1526        jccb  (Assembler::notZero, DONE_LABEL);
1527        get_thread (scrReg);                    // beware: clobbers ICCs
1528        movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
1529        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1530 
1531        // If the CAS fails we can either retry or pass control to the slow-path.
1532        // We use the latter tactic.
1533        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1534        // If the CAS was successful ...
1535        //   Self has acquired the lock
1536        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1537        // Intentional fall-through into DONE_LABEL ...
1538     } else {
1539        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
1540        movptr(boxReg, tmpReg);
1541 
1542        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1543        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1544           // prefetchw [eax + Offset(_owner)-2]
1545           prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1546        }
1547 
1548        if ((EmitSync & 64) == 0) {
1549          // Optimistic form
1550          xorptr  (tmpReg, tmpReg);
1551        } else {
1552          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1553          movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
1554          testptr(tmpReg, tmpReg);                   // Locked ?
1555          jccb  (Assembler::notZero, DONE_LABEL);
1556        }
1557 
1558        // Appears unlocked - try to swing _owner from null to non-null.
1559        // Use either "Self" (in scr) or rsp as thread identity in _owner.
1560        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1561        get_thread (scrReg);
1562        if (os::is_MP()) {
1563          lock();
1564        }
1565        cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1566 
1567        // If the CAS fails we can either retry or pass control to the slow-path.
1568        // We use the latter tactic.
1569        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1570        // If the CAS was successful ...
1571        //   Self has acquired the lock
1572        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1573        // Intentional fall-through into DONE_LABEL ...
1574     }
1575 #else // _LP64
1576     // It's inflated
1577 
1578     // TODO: someday avoid the ST-before-CAS penalty by
1579     // relocating (deferring) the following ST.
1580     // We should also think about trying a CAS without having
1581     // fetched _owner.  If the CAS is successful we may
1582     // avoid an RTO->RTS upgrade on the $line.
1583 
1584     // Without cast to int32_t a movptr will destroy r10 which is typically obj
1585     movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1586 
1587     mov    (boxReg, tmpReg);
1588     movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1589     testptr(tmpReg, tmpReg);
1590     jccb   (Assembler::notZero, DONE_LABEL);
1591 
1592     // It's inflated and appears unlocked
1593     if (os::is_MP()) {
1594       lock();
1595     }
1596     cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
1597     // Intentional fall-through into DONE_LABEL ...
1598 
1599 #endif
1600 
1601     // DONE_LABEL is a hot target - we'd really like to place it at the
1602     // start of cache line by padding with NOPs.
1603     // See the AMD and Intel software optimization manuals for the
1604     // most efficient "long" NOP encodings.
1605     // Unfortunately none of our alignment mechanisms suffice.
1606     bind(DONE_LABEL);
1607 
1608     // At DONE_LABEL the icc ZFlag is set as follows ...
1609     // Fast_Unlock uses the same protocol.
1610     // ZFlag == 1 -> Success
1611     // ZFlag == 0 -> Failure - force control through the slow-path
1612   }
1613 }
1614 
1615 // obj: object to unlock
1616 // box: box address (displaced header location), killed.  Must be EAX.
1617 // tmp: killed, cannot be obj nor box.
1618 //
1619 // Some commentary on balanced locking:
1620 //
1621 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
1622 // Methods that don't have provably balanced locking are forced to run in the
1623 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
1624 // The interpreter provides two properties:
1625 // I1:  At return-time the interpreter automatically and quietly unlocks any
1626 //      objects acquired the current activation (frame).  Recall that the
1627 //      interpreter maintains an on-stack list of locks currently held by
1628 //      a frame.
1629 // I2:  If a method attempts to unlock an object that is not held by the
1630 //      the frame the interpreter throws IMSX.
1631 //
1632 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1633 // B() doesn't have provably balanced locking so it runs in the interpreter.
1634 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
1635 // is still locked by A().
1636 //
1637 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
1638 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1639 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
1640 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1641 
1642 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
1643   guarantee (objReg != boxReg, "");
1644   guarantee (objReg != tmpReg, "");
1645   guarantee (boxReg != tmpReg, "");
1646   guarantee (boxReg == rax, "");
1647 
1648   if (EmitSync & 4) {
1649     // Disable - inhibit all inlining.  Force control through the slow-path
1650     cmpptr (rsp, 0);
1651   } else
1652   if (EmitSync & 8) {
1653     Label DONE_LABEL;
1654     if (UseBiasedLocking) {
1655        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1656     }
1657     // Classic stack-locking code ...
1658     // Check whether the displaced header is 0
1659     //(=> recursive unlock)
1660     movptr(tmpReg, Address(boxReg, 0));
1661     testptr(tmpReg, tmpReg);
1662     jccb(Assembler::zero, DONE_LABEL);
1663     // If not recursive lock, reset the header to displaced header
1664     if (os::is_MP()) {
1665       lock();
1666     }
1667     cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
1668     bind(DONE_LABEL);
1669   } else {
1670     Label DONE_LABEL, Stacked, CheckSucc;
1671 
1672     // Critically, the biased locking test must have precedence over
1673     // and appear before the (box->dhw == 0) recursive stack-lock test.
1674     if (UseBiasedLocking && !UseOptoBiasInlining) {
1675        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1676     }
1677 
1678     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1679     movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
1680     jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
1681 
1682     testptr(tmpReg, 0x02);                          // Inflated?
1683     jccb  (Assembler::zero, Stacked);
1684 
1685     // It's inflated.
1686     // Despite our balanced locking property we still check that m->_owner == Self
1687     // as java routines or native JNI code called by this thread might
1688     // have released the lock.
1689     // Refer to the comments in synchronizer.cpp for how we might encode extra
1690     // state in _succ so we can avoid fetching EntryList|cxq.
1691     //
1692     // I'd like to add more cases in fast_lock() and fast_unlock() --
1693     // such as recursive enter and exit -- but we have to be wary of
1694     // I$ bloat, T$ effects and BP$ effects.
1695     //
1696     // If there's no contention try a 1-0 exit.  That is, exit without
1697     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
1698     // we detect and recover from the race that the 1-0 exit admits.
1699     //
1700     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
1701     // before it STs null into _owner, releasing the lock.  Updates
1702     // to data protected by the critical section must be visible before
1703     // we drop the lock (and thus before any other thread could acquire
1704     // the lock and observe the fields protected by the lock).
1705     // IA32's memory-model is SPO, so STs are ordered with respect to
1706     // each other and there's no need for an explicit barrier (fence).
1707     // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
1708 #ifndef _LP64
1709     get_thread (boxReg);
1710     if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1711       // prefetchw [ebx + Offset(_owner)-2]
1712       prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1713     }
1714 
1715     // Note that we could employ various encoding schemes to reduce
1716     // the number of loads below (currently 4) to just 2 or 3.
1717     // Refer to the comments in synchronizer.cpp.
1718     // In practice the chain of fetches doesn't seem to impact performance, however.
1719     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
1720        // Attempt to reduce branch density - AMD's branch predictor.
1721        xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1722        orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
1723        orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
1724        orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
1725        jccb  (Assembler::notZero, DONE_LABEL);
1726        movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
1727        jmpb  (DONE_LABEL);
1728     } else {
1729        xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1730        orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
1731        jccb  (Assembler::notZero, DONE_LABEL);
1732        movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
1733        orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
1734        jccb  (Assembler::notZero, CheckSucc);
1735        movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
1736        jmpb  (DONE_LABEL);
1737     }
1738 
1739     // The Following code fragment (EmitSync & 65536) improves the performance of
1740     // contended applications and contended synchronization microbenchmarks.
1741     // Unfortunately the emission of the code - even though not executed - causes regressions
1742     // in scimark and jetstream, evidently because of $ effects.  Replacing the code
1743     // with an equal number of never-executed NOPs results in the same regression.
1744     // We leave it off by default.
1745 
1746     if ((EmitSync & 65536) != 0) {
1747        Label LSuccess, LGoSlowPath ;
1748 
1749        bind  (CheckSucc);
1750 
1751        // Optional pre-test ... it's safe to elide this
1752        if ((EmitSync & 16) == 0) {
1753           cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
1754           jccb  (Assembler::zero, LGoSlowPath);
1755        }
1756 
1757        // We have a classic Dekker-style idiom:
1758        //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
1759        // There are a number of ways to implement the barrier:
1760        // (1) lock:andl &m->_owner, 0
1761        //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
1762        //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
1763        //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
1764        // (2) If supported, an explicit MFENCE is appealing.
1765        //     In older IA32 processors MFENCE is slower than lock:add or xchg
1766        //     particularly if the write-buffer is full as might be the case if
1767        //     if stores closely precede the fence or fence-equivalent instruction.
1768        //     In more modern implementations MFENCE appears faster, however.
1769        // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
1770        //     The $lines underlying the top-of-stack should be in M-state.
1771        //     The locked add instruction is serializing, of course.
1772        // (4) Use xchg, which is serializing
1773        //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
1774        // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
1775        //     The integer condition codes will tell us if succ was 0.
1776        //     Since _succ and _owner should reside in the same $line and
1777        //     we just stored into _owner, it's likely that the $line
1778        //     remains in M-state for the lock:orl.
1779        //
1780        // We currently use (3), although it's likely that switching to (2)
1781        // is correct for the future.
1782 
1783        movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
1784        if (os::is_MP()) {
1785           if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
1786             mfence();
1787           } else {
1788             lock (); addptr(Address(rsp, 0), 0);
1789           }
1790        }
1791        // Ratify _succ remains non-null
1792        cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
1793        jccb  (Assembler::notZero, LSuccess);
1794 
1795        xorptr(boxReg, boxReg);                  // box is really EAX
1796        if (os::is_MP()) { lock(); }
1797        cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1798        jccb  (Assembler::notEqual, LSuccess);
1799        // Since we're low on registers we installed rsp as a placeholding in _owner.
1800        // Now install Self over rsp.  This is safe as we're transitioning from
1801        // non-null to non=null
1802        get_thread (boxReg);
1803        movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
1804        // Intentional fall-through into LGoSlowPath ...
1805 
1806        bind  (LGoSlowPath);
1807        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
1808        jmpb  (DONE_LABEL);
1809 
1810        bind  (LSuccess);
1811        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
1812        jmpb  (DONE_LABEL);
1813     }
1814 
1815     bind (Stacked);
1816     // It's not inflated and it's not recursively stack-locked and it's not biased.
1817     // It must be stack-locked.
1818     // Try to reset the header to displaced header.
1819     // The "box" value on the stack is stable, so we can reload
1820     // and be assured we observe the same value as above.
1821     movptr(tmpReg, Address(boxReg, 0));
1822     if (os::is_MP()) {
1823       lock();
1824     }
1825     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
1826     // Intention fall-thru into DONE_LABEL
1827 
1828     // DONE_LABEL is a hot target - we'd really like to place it at the
1829     // start of cache line by padding with NOPs.
1830     // See the AMD and Intel software optimization manuals for the
1831     // most efficient "long" NOP encodings.
1832     // Unfortunately none of our alignment mechanisms suffice.
1833     if ((EmitSync & 65536) == 0) {
1834        bind (CheckSucc);
1835     }
1836 #else // _LP64
1837     // It's inflated
1838     movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1839     xorptr(boxReg, r15_thread);
1840     orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
1841     jccb  (Assembler::notZero, DONE_LABEL);
1842     movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
1843     orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
1844     jccb  (Assembler::notZero, CheckSucc);
1845     movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
1846     jmpb  (DONE_LABEL);
1847 
1848     if ((EmitSync & 65536) == 0) {
1849       Label LSuccess, LGoSlowPath ;
1850       bind  (CheckSucc);
1851       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
1852       jccb  (Assembler::zero, LGoSlowPath);
1853 
1854       // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
1855       // the explicit ST;MEMBAR combination, but masm doesn't currently support
1856       // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
1857       // are all faster when the write buffer is populated.
1858       movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
1859       if (os::is_MP()) {
1860          lock (); addl (Address(rsp, 0), 0);
1861       }
1862       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
1863       jccb  (Assembler::notZero, LSuccess);
1864 
1865       movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
1866       if (os::is_MP()) { lock(); }
1867       cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
1868       jccb  (Assembler::notEqual, LSuccess);
1869       // Intentional fall-through into slow-path
1870 
1871       bind  (LGoSlowPath);
1872       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
1873       jmpb  (DONE_LABEL);
1874 
1875       bind  (LSuccess);
1876       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
1877       jmpb  (DONE_LABEL);
1878     }
1879 
1880     bind  (Stacked);
1881     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
1882     if (os::is_MP()) { lock(); }
1883     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
1884 
1885     if (EmitSync & 65536) {
1886        bind (CheckSucc);
1887     }
1888 #endif
1889     bind(DONE_LABEL);
1890     // Avoid branch to branch on AMD processors
1891     if (EmitSync & 32768) {
1892        nop();
1893     }
1894   }
1895 }
1896 #endif // COMPILER2
1897 
1898 void MacroAssembler::c2bool(Register x) {
1899   // implements x == 0 ? 0 : 1
1900   // note: must only look at least-significant byte of x
1901   //       since C-style booleans are stored in one byte
1902   //       only! (was bug)
1903   andl(x, 0xFF);
1904   setb(Assembler::notZero, x);
1905 }
1906 
1907 // Wouldn't need if AddressLiteral version had new name
1908 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
1909   Assembler::call(L, rtype);
1910 }
1911 
1912 void MacroAssembler::call(Register entry) {
1913   Assembler::call(entry);
1914 }
1915 
1916 void MacroAssembler::call(AddressLiteral entry) {
1917   if (reachable(entry)) {


src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File