< prev index next >

src/cpu/ppc/vm/macroAssembler_ppc.cpp

Print this page
rev 8107 : 8077838: Recent developments for ppc.
   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2014 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


1438 }
1439 
1440 // Preforms atomic compare exchange:
1441 //   if (compare_value == *addr_base)
1442 //     *addr_base = exchange_value
1443 //     int_flag_success = 1;
1444 //   else
1445 //     int_flag_success = 0;
1446 //
1447 // ConditionRegister flag       = cmp(compare_value, *addr_base)
1448 // Register dest_current_value  = *addr_base
1449 // Register compare_value       Used to compare with value in memory
1450 // Register exchange_value      Written to memory if compare_value == *addr_base
1451 // Register addr_base           The memory location to compareXChange
1452 // Register int_flag_success    Set to 1 if exchange_value was written to *addr_base
1453 //
1454 // To avoid the costly compare exchange the value is tested beforehand.
1455 // Several special cases exist to avoid that unnecessary information is generated.
1456 //
1457 void MacroAssembler::cmpxchgd(ConditionRegister flag,
1458                               Register dest_current_value, Register compare_value, Register exchange_value,
1459                               Register addr_base, int semantics, bool cmpxchgx_hint,
1460                               Register int_flag_success, Label* failed_ext, bool contention_hint) {
1461   Label retry;
1462   Label failed_int;
1463   Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
1464   Label done;
1465 
1466   // Save one branch if result is returned via register and result register is different from the other ones.
1467   bool use_result_reg    = (int_flag_success!=noreg);
1468   bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
1469                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
1470   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
1471 
1472   // release/fence semantics
1473   if (semantics & MemBarRel) {
1474     release();
1475   }
1476 
1477   if (use_result_reg && preset_result_reg) {
1478     li(int_flag_success, 0); // preset (assume cas failed)
1479   }
1480 
1481   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1482   if (contention_hint) { // Don't try to reserve if cmp fails.
1483     ld(dest_current_value, 0, addr_base);
1484     cmpd(flag, dest_current_value, compare_value);
1485     bne(flag, failed);
1486   }
1487 
1488   // atomic emulation loop
1489   bind(retry);
1490 
1491   ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1492   cmpd(flag, dest_current_value, compare_value);
1493   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1494     bne_predict_not_taken(flag, failed);
1495   } else {
1496     bne(                  flag, failed);
1497   }
1498 
1499   stdcx_(exchange_value, addr_base);
1500   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1501     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1502   } else {
1503     bne(                  CCR0, retry); // stXcx_ sets CCR0
1504   }
1505 
1506   // result in register (must do this at the end because int_flag_success can be the same register as one above)
1507   if (use_result_reg) {
1508     li(int_flag_success, 1);
1509   }
1510 
1511   // POWER6 doesn't need isync in CAS.
1512   // Always emit isync to be on the safe side.


1856   // rotate epoch bits to right (little) end and set other bits to 0
1857   // [ big part | epoch | little part ] -> [ 0..0 | epoch ]
1858   rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits);
1859   // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented
1860   bne(CCR0, try_rebias);
1861 
1862   // The epoch of the current bias is still valid but we know nothing
1863   // about the owner; it might be set or it might be clear. Try to
1864   // acquire the bias of the object using an atomic operation. If this
1865   // fails we will go in to the runtime to revoke the object's bias.
1866   // Note that we first construct the presumed unbiased header so we
1867   // don't accidentally blow away another thread's valid bias.
1868   andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place |
1869                                 markOopDesc::age_mask_in_place |
1870                                 markOopDesc::epoch_mask_in_place));
1871   orr(temp_reg, R16_thread, mark_reg);
1872 
1873   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1874 
1875   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1876   fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1877   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1878            /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1879            /*where=*/obj_reg,
1880            MacroAssembler::MemBarAcq,
1881            MacroAssembler::cmpxchgx_hint_acquire_lock(),
1882            noreg, slow_case_int); // bail out if failed
1883 
1884   // If the biasing toward our thread failed, this means that
1885   // another thread succeeded in biasing it toward itself and we
1886   // need to revoke that bias. The revocation will occur in the
1887   // interpreter runtime in the slow case.
1888   if (PrintBiasedLockingStatistics) {
1889     load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
1890     lwz(temp2_reg, 0, temp_reg);
1891     addi(temp2_reg, temp2_reg, 1);
1892     stw(temp2_reg, 0, temp_reg);
1893   }
1894   b(done);
1895 
1896   bind(try_rebias);
1897   // At this point we know the epoch has expired, meaning that the
1898   // current "bias owner", if any, is actually invalid. Under these
1899   // circumstances _only_, we are allowed to use the current header's
1900   // value as the comparison value when doing the cas to acquire the
1901   // bias in the current epoch. In other words, we allow transfer of
1902   // the bias from one thread to another directly in this situation.
1903   andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
1904   orr(temp_reg, R16_thread, temp_reg);
1905   load_klass(temp2_reg, obj_reg);
1906   ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
1907   orr(temp_reg, temp_reg, temp2_reg);
1908 
1909   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1910 
1911   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1912   fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1913   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1914                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1915                  /*where=*/obj_reg,
1916                  MacroAssembler::MemBarAcq,
1917                  MacroAssembler::cmpxchgx_hint_acquire_lock(),
1918                  noreg, slow_case_int); // bail out if failed
1919 
1920   // If the biasing toward our thread failed, this means that
1921   // another thread succeeded in biasing it toward itself and we
1922   // need to revoke that bias. The revocation will occur in the
1923   // interpreter runtime in the slow case.
1924   if (PrintBiasedLockingStatistics) {
1925     load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
1926     lwz(temp2_reg, 0, temp_reg);
1927     addi(temp2_reg, temp2_reg, 1);
1928     stw(temp2_reg, 0, temp_reg);
1929   }
1930   b(done);
1931 
1932   bind(try_revoke_bias);
1933   // The prototype mark in the klass doesn't have the bias bit set any
1934   // more, indicating that objects of this data type are not supposed
1935   // to be biased any more. We are going to try to reset the mark of
1936   // this object to the prototype value and fall through to the
1937   // CAS-based locking scheme. Note that if our CAS fails, it means
1938   // that another thread raced us for the privilege of revoking the
1939   // bias of this particular object, so it's okay to continue in the
1940   // normal locking code.
1941   load_klass(temp_reg, obj_reg);
1942   ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
1943   andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
1944   orr(temp_reg, temp_reg, temp2_reg);
1945 
1946   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1947 
1948   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1949   fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1950   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1951                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1952                  /*where=*/obj_reg,
1953                  MacroAssembler::MemBarAcq,
1954                  MacroAssembler::cmpxchgx_hint_acquire_lock());
1955 
1956   // reload markOop in mark_reg before continuing with lightweight locking
1957   ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
1958 
1959   // Fall through to the normal CAS-based lock, because no matter what
1960   // the result of the above CAS, some thread must have succeeded in
1961   // removing the bias bit from the object's header.
1962   if (PrintBiasedLockingStatistics) {
1963     Label l;
1964     bne(cr_reg, l);
1965     load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
1966     lwz(temp2_reg, 0, temp_reg);
1967     addi(temp2_reg, temp2_reg, 1);
1968     stw(temp2_reg, 0, temp_reg);
1969     bind(l);
1970   }
1971 
1972   bind(cas_label);
1973 }
1974 
1975 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) {
1976   // Check for biased locking unlock case, which is a no-op
1977   // Note: we do not have to check the thread ID for two reasons.
1978   // First, the interpreter checks for IllegalMonitorStateException at
1979   // a higher level. Second, if the bias was revoked while we held the
1980   // lock, the object could not be rebiased toward another thread, so
1981   // the bias bit would be clear.
1982 
1983   ld(temp_reg, 0, mark_addr);
1984   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1985 
1986   cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
1987   beq(cr_reg, done);
1988 }
1989 





































































































































































































































































































































































1990 // "The box" is the space on the stack where we copy the object mark.
1991 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
1992                                                Register temp, Register displaced_header, Register current_header) {





1993   assert_different_registers(oop, box, temp, displaced_header, current_header);
1994   assert(flag != CCR0, "bad condition register");
1995   Label cont;
1996   Label object_has_monitor;
1997   Label cas_failed;
1998 
1999   // Load markOop from object into displaced_header.
2000   ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2001 
2002 
2003   // Always do locking in runtime.
2004   if (EmitSync & 0x01) {
2005     cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2006     return;
2007   }
2008 
2009   if (UseBiasedLocking) {
2010     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
2011   }
2012 








2013   // Handle existing monitor.
2014   if ((EmitSync & 0x02) == 0) {
2015     // The object has an existing monitor iff (mark & monitor_value) != 0.
2016     andi_(temp, displaced_header, markOopDesc::monitor_value);
2017     bne(CCR0, object_has_monitor);
2018   }
2019 
2020   // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
2021   ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
2022 
2023   // Load Compare Value application register.
2024 
2025   // Initialize the box. (Must happen before we update the object mark!)
2026   std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2027 
2028   // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2029   // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
2030   // CmpxchgX sets cr_reg to cmpX(current, displaced).
2031   membar(Assembler::StoreStore);
2032   cmpxchgd(/*flag=*/flag,


2049 
2050   // Check if the owner is self by comparing the value in the markOop of object
2051   // (current_header) with the stack pointer.
2052   sub(current_header, current_header, R1_SP);
2053   load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
2054                                         markOopDesc::lock_mask_in_place));
2055 
2056   and_(R0/*==0?*/, current_header, temp);
2057   // If condition is true we are cont and hence we can store 0 as the
2058   // displaced header in the box, which indicates that it is a recursive lock.
2059   mcrf(flag,CCR0);
2060   std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2061 
2062   // Handle existing monitor.
2063   if ((EmitSync & 0x02) == 0) {
2064     b(cont);
2065 
2066     bind(object_has_monitor);
2067     // The object's monitor m is unlocked iff m->owner == NULL,
2068     // otherwise m->owner may contain a thread or a stack address.
2069     //








2070     // Try to CAS m->owner from NULL to current thread.
2071     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
2072     li(displaced_header, 0);
2073     // CmpxchgX sets flag to cmpX(current, displaced).
2074     cmpxchgd(/*flag=*/flag,
2075              /*current_value=*/current_header,
2076              /*compare_value=*/displaced_header,
2077              /*exchange_value=*/R16_thread,
2078              /*where=*/temp,
2079              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2080              MacroAssembler::cmpxchgx_hint_acquire_lock());
2081 
2082     // Store a non-null value into the box.
2083     std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2084 
2085 #   ifdef ASSERT
2086     bne(flag, cont);
2087     // We have acquired the monitor, check some invariants.
2088     addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
2089     // Invariant 1: _recursions should be 0.
2090     //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
2091     asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
2092                             "monitor->_recursions should be 0", -1);
2093     // Invariant 2: OwnerIsThread shouldn't be 0.
2094     //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
2095     //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
2096     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
2097 #   endif




2098   }
2099 
2100   bind(cont);
2101   // flag == EQ indicates success
2102   // flag == NE indicates failure
2103 }
2104 
2105 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2106                                                  Register temp, Register displaced_header, Register current_header) {

2107   assert_different_registers(oop, box, temp, displaced_header, current_header);
2108   assert(flag != CCR0, "bad condition register");
2109   Label cont;
2110   Label object_has_monitor;
2111 
2112   // Always do locking in runtime.
2113   if (EmitSync & 0x01) {
2114     cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2115     return;
2116   }
2117 
2118   if (UseBiasedLocking) {
2119     biased_locking_exit(flag, oop, current_header, cont);
2120   }
2121 














2122   // Find the lock address and load the displaced header from the stack.
2123   ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2124 
2125   // If the displaced header is 0, we have a recursive unlock.
2126   cmpdi(flag, displaced_header, 0);
2127   beq(flag, cont);
2128 
2129   // Handle existing monitor.
2130   if ((EmitSync & 0x02) == 0) {
2131     // The object has an existing monitor iff (mark & monitor_value) != 0.

2132     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2133     andi(temp, current_header, markOopDesc::monitor_value);
2134     cmpdi(flag, temp, 0);
2135     bne(flag, object_has_monitor);
2136   }
2137 
2138 
2139   // Check if it is still a light weight lock, this is is true if we see
2140   // the stack address of the basicLock in the markOop of the object.
2141   // Cmpxchg sets flag to cmpd(current_header, box).
2142   cmpxchgd(/*flag=*/flag,
2143            /*current_value=*/current_header,
2144            /*compare_value=*/box,
2145            /*exchange_value=*/displaced_header,
2146            /*where=*/oop,
2147            MacroAssembler::MemBarRel,
2148            MacroAssembler::cmpxchgx_hint_release_lock(),
2149            noreg,
2150            &cont);
2151 
2152   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2153 
2154   // Handle existing monitor.
2155   if ((EmitSync & 0x02) == 0) {
2156     b(cont);
2157 
2158     bind(object_has_monitor);
2159     addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
2160     ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);














2161     ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
2162     xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
2163     orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
2164     cmpdi(flag, temp, 0);
2165     bne(flag, cont);
2166 
2167     ld(temp,             ObjectMonitor::EntryList_offset_in_bytes(), current_header);
2168     ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
2169     orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2170     cmpdi(flag, temp, 0);
2171     bne(flag, cont);
2172     release();
2173     std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2174   }
2175 
2176   bind(cont);
2177   // flag == EQ indicates success
2178   // flag == NE indicates failure
2179 }
2180 


2424   // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
2425 #ifdef CC_INTERP
2426   ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp);
2427 #else
2428   address entry = pc();
2429   load_const_optimized(tmp1, entry);
2430 #endif
2431 
2432   set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1);
2433 }
2434 
2435 void MacroAssembler::get_vm_result(Register oop_result) {
2436   // Read:
2437   //   R16_thread
2438   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
2439   //
2440   // Updated:
2441   //   oop_result
2442   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
2443 


2444   ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2445   li(R0, 0);
2446   std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2447 
2448   verify_oop(oop_result);
2449 }
2450 
2451 void MacroAssembler::get_vm_result_2(Register metadata_result) {
2452   // Read:
2453   //   R16_thread
2454   //   R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2455   //
2456   // Updated:
2457   //   metadata_result
2458   //   R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2459 
2460   ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2461   li(R0, 0);
2462   std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2463 }
2464 
2465 
2466 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2467   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
2468   if (Universe::narrow_klass_base() != 0) {
2469     // Use dst as temp if it is free.
2470     load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
2471     sub(dst, current, R0);
2472     current = dst;
2473   }
2474   if (Universe::narrow_klass_shift() != 0) {
2475     srdi(dst, current, Universe::narrow_klass_shift());
2476     current = dst;
2477   }
2478   mr_if_needed(dst, current); // Move may be required.
2479 }
2480 
2481 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
2482   if (UseCompressedClassPointers) {
2483     encode_klass_not_null(ck, klass);
2484     stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
2485   } else {
2486     std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
2487   }
2488 }
2489 
2490 void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
2491   if (UseCompressedClassPointers) {
2492     if (val == noreg) {
2493       val = R0;
2494       li(val, 0);
2495     }
2496     stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
2497   }
2498 }
2499 
2500 int MacroAssembler::instr_size_for_decode_klass_not_null() {
2501   if (!UseCompressedClassPointers) return 0;
2502   int num_instrs = 1;  // shift or move
2503   if (Universe::narrow_klass_base() != 0) num_instrs = 7;  // shift + load const + add
2504   return num_instrs * BytesPerInstWord;
2505 }
2506 
2507 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2508   assert(dst != R0, "Dst reg may not be R0, as R0 is used here.");
2509   if (src == noreg) src = dst;
2510   Register shifted_src = src;
2511   if (Universe::narrow_klass_shift() != 0 ||
2512       Universe::narrow_klass_base() == 0 && src != dst) {  // Move required.
2513     shifted_src = dst;
2514     sldi(shifted_src, src, Universe::narrow_klass_shift());
2515   }
2516   if (Universe::narrow_klass_base() != 0) {
2517     load_const(R0, Universe::narrow_klass_base());
2518     add(dst, shifted_src, R0);
2519   }
2520 }
2521 
2522 void MacroAssembler::load_klass(Register dst, Register src) {
2523   if (UseCompressedClassPointers) {
2524     lwz(dst, oopDesc::klass_offset_in_bytes(), src);
2525     // Attention: no null check here!
2526     decode_klass_not_null(dst, dst);
2527   } else {
2528     ld(dst, oopDesc::klass_offset_in_bytes(), src);
2529   }
2530 }
2531 
2532 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
2533   if (!os::zero_page_read_protected()) {
2534     if (TrapBasedNullChecks) {
2535       trap_null_check(src);
2536     }
2537   }
2538   load_klass(dst, src);


   1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright 2012, 2015 SAP AG. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *


1438 }
1439 
1440 // Preforms atomic compare exchange:
1441 //   if (compare_value == *addr_base)
1442 //     *addr_base = exchange_value
1443 //     int_flag_success = 1;
1444 //   else
1445 //     int_flag_success = 0;
1446 //
1447 // ConditionRegister flag       = cmp(compare_value, *addr_base)
1448 // Register dest_current_value  = *addr_base
1449 // Register compare_value       Used to compare with value in memory
1450 // Register exchange_value      Written to memory if compare_value == *addr_base
1451 // Register addr_base           The memory location to compareXChange
1452 // Register int_flag_success    Set to 1 if exchange_value was written to *addr_base
1453 //
1454 // To avoid the costly compare exchange the value is tested beforehand.
1455 // Several special cases exist to avoid that unnecessary information is generated.
1456 //
1457 void MacroAssembler::cmpxchgd(ConditionRegister flag,
1458                               Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
1459                               Register addr_base, int semantics, bool cmpxchgx_hint,
1460                               Register int_flag_success, Label* failed_ext, bool contention_hint) {
1461   Label retry;
1462   Label failed_int;
1463   Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
1464   Label done;
1465 
1466   // Save one branch if result is returned via register and result register is different from the other ones.
1467   bool use_result_reg    = (int_flag_success!=noreg);
1468   bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
1469                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
1470   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
1471 
1472   // release/fence semantics
1473   if (semantics & MemBarRel) {
1474     release();
1475   }
1476 
1477   if (use_result_reg && preset_result_reg) {
1478     li(int_flag_success, 0); // preset (assume cas failed)
1479   }
1480 
1481   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1482   if (contention_hint) { // Don't try to reserve if cmp fails.
1483     ld(dest_current_value, 0, addr_base);
1484     cmpd(flag, compare_value, dest_current_value);
1485     bne(flag, failed);
1486   }
1487 
1488   // atomic emulation loop
1489   bind(retry);
1490 
1491   ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1492   cmpd(flag, compare_value, dest_current_value);
1493   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1494     bne_predict_not_taken(flag, failed);
1495   } else {
1496     bne(                  flag, failed);
1497   }
1498 
1499   stdcx_(exchange_value, addr_base);
1500   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1501     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1502   } else {
1503     bne(                  CCR0, retry); // stXcx_ sets CCR0
1504   }
1505 
1506   // result in register (must do this at the end because int_flag_success can be the same register as one above)
1507   if (use_result_reg) {
1508     li(int_flag_success, 1);
1509   }
1510 
1511   // POWER6 doesn't need isync in CAS.
1512   // Always emit isync to be on the safe side.


1856   // rotate epoch bits to right (little) end and set other bits to 0
1857   // [ big part | epoch | little part ] -> [ 0..0 | epoch ]
1858   rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits);
1859   // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented
1860   bne(CCR0, try_rebias);
1861 
1862   // The epoch of the current bias is still valid but we know nothing
1863   // about the owner; it might be set or it might be clear. Try to
1864   // acquire the bias of the object using an atomic operation. If this
1865   // fails we will go in to the runtime to revoke the object's bias.
1866   // Note that we first construct the presumed unbiased header so we
1867   // don't accidentally blow away another thread's valid bias.
1868   andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place |
1869                                 markOopDesc::age_mask_in_place |
1870                                 markOopDesc::epoch_mask_in_place));
1871   orr(temp_reg, R16_thread, mark_reg);
1872 
1873   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1874 
1875   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).

1876   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1877            /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1878            /*where=*/obj_reg,
1879            MacroAssembler::MemBarAcq,
1880            MacroAssembler::cmpxchgx_hint_acquire_lock(),
1881            noreg, slow_case_int); // bail out if failed
1882 
1883   // If the biasing toward our thread failed, this means that
1884   // another thread succeeded in biasing it toward itself and we
1885   // need to revoke that bias. The revocation will occur in the
1886   // interpreter runtime in the slow case.
1887   if (PrintBiasedLockingStatistics) {
1888     load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
1889     lwz(temp2_reg, 0, temp_reg);
1890     addi(temp2_reg, temp2_reg, 1);
1891     stw(temp2_reg, 0, temp_reg);
1892   }
1893   b(done);
1894 
1895   bind(try_rebias);
1896   // At this point we know the epoch has expired, meaning that the
1897   // current "bias owner", if any, is actually invalid. Under these
1898   // circumstances _only_, we are allowed to use the current header's
1899   // value as the comparison value when doing the cas to acquire the
1900   // bias in the current epoch. In other words, we allow transfer of
1901   // the bias from one thread to another directly in this situation.
1902   andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
1903   orr(temp_reg, R16_thread, temp_reg);
1904   load_klass(temp2_reg, obj_reg);
1905   ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
1906   orr(temp_reg, temp_reg, temp2_reg);
1907 
1908   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1909 
1910   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).

1911   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1912                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1913                  /*where=*/obj_reg,
1914                  MacroAssembler::MemBarAcq,
1915                  MacroAssembler::cmpxchgx_hint_acquire_lock(),
1916                  noreg, slow_case_int); // bail out if failed
1917 
1918   // If the biasing toward our thread failed, this means that
1919   // another thread succeeded in biasing it toward itself and we
1920   // need to revoke that bias. The revocation will occur in the
1921   // interpreter runtime in the slow case.
1922   if (PrintBiasedLockingStatistics) {
1923     load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
1924     lwz(temp2_reg, 0, temp_reg);
1925     addi(temp2_reg, temp2_reg, 1);
1926     stw(temp2_reg, 0, temp_reg);
1927   }
1928   b(done);
1929 
1930   bind(try_revoke_bias);
1931   // The prototype mark in the klass doesn't have the bias bit set any
1932   // more, indicating that objects of this data type are not supposed
1933   // to be biased any more. We are going to try to reset the mark of
1934   // this object to the prototype value and fall through to the
1935   // CAS-based locking scheme. Note that if our CAS fails, it means
1936   // that another thread raced us for the privilege of revoking the
1937   // bias of this particular object, so it's okay to continue in the
1938   // normal locking code.
1939   load_klass(temp_reg, obj_reg);
1940   ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
1941   andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
1942   orr(temp_reg, temp_reg, temp2_reg);
1943 
1944   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1945 
1946   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).

1947   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1948                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1949                  /*where=*/obj_reg,
1950                  MacroAssembler::MemBarAcq,
1951                  MacroAssembler::cmpxchgx_hint_acquire_lock());
1952 
1953   // reload markOop in mark_reg before continuing with lightweight locking
1954   ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
1955 
1956   // Fall through to the normal CAS-based lock, because no matter what
1957   // the result of the above CAS, some thread must have succeeded in
1958   // removing the bias bit from the object's header.
1959   if (PrintBiasedLockingStatistics) {
1960     Label l;
1961     bne(cr_reg, l);
1962     load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
1963     lwz(temp2_reg, 0, temp_reg);
1964     addi(temp2_reg, temp2_reg, 1);
1965     stw(temp2_reg, 0, temp_reg);
1966     bind(l);
1967   }
1968 
1969   bind(cas_label);
1970 }
1971 
1972 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) {
1973   // Check for biased locking unlock case, which is a no-op
1974   // Note: we do not have to check the thread ID for two reasons.
1975   // First, the interpreter checks for IllegalMonitorStateException at
1976   // a higher level. Second, if the bias was revoked while we held the
1977   // lock, the object could not be rebiased toward another thread, so
1978   // the bias bit would be clear.
1979 
1980   ld(temp_reg, 0, mark_addr);
1981   andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1982 
1983   cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
1984   beq(cr_reg, done);
1985 }
1986 
1987 // TM on PPC64.
1988 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
1989   Label retry;
1990   bind(retry);
1991   ldarx(result, addr, /*hint*/ false);
1992   addi(result, result, simm16);
1993   stdcx_(result, addr);
1994   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1995     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1996   } else {
1997     bne(                  CCR0, retry); // stXcx_ sets CCR0
1998   }
1999 }
2000 
2001 void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
2002   Label retry;
2003   bind(retry);
2004   lwarx(result, addr, /*hint*/ false);
2005   ori(result, result, uimm16);
2006   stwcx_(result, addr);
2007   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2008     bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2009   } else {
2010     bne(                  CCR0, retry); // stXcx_ sets CCR0
2011   }
2012 }
2013 
2014 #if INCLUDE_RTM_OPT
2015 
2016 // Update rtm_counters based on abort status
2017 // input: abort_status
2018 //        rtm_counters (RTMLockingCounters*)
2019 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
2020   // Mapping to keep PreciseRTMLockingStatistics similar to x86.
2021   // x86 ppc (! means inverted, ? means not the same)
2022   //  0   31  Set if abort caused by XABORT instruction.
2023   //  1  ! 7  If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
2024   //  2   13  Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
2025   //  3   10  Set if an internal buffer overflowed.
2026   //  4  ?12  Set if a debug breakpoint was hit.
2027   //  5  ?32  Set if an abort occurred during execution of a nested transaction.
2028   const  int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
2029                                  Assembler::tm_failure_persistent, // inverted: transient
2030                                  Assembler::tm_trans_cf,
2031                                  Assembler::tm_footprint_of,
2032                                  Assembler::tm_non_trans_cf,
2033                                  Assembler::tm_suspended};
2034   const bool tm_failure_inv[] = {false, true, false, false, false, false};
2035   assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
2036 
2037   const Register addr_Reg = R0;
2038   // Keep track of offset to where rtm_counters_Reg had pointed to.
2039   int counters_offs = RTMLockingCounters::abort_count_offset();
2040   addi(addr_Reg, rtm_counters_Reg, counters_offs);
2041   const Register temp_Reg = rtm_counters_Reg;
2042 
2043   //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
2044   ldx(temp_Reg, addr_Reg);
2045   addi(temp_Reg, temp_Reg, 1);
2046   stdx(temp_Reg, addr_Reg);
2047 
2048   if (PrintPreciseRTMLockingStatistics) {
2049     int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
2050 
2051     //mftexasr(abort_status); done by caller
2052     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
2053       counters_offs += counters_offs_delta;
2054       li(temp_Reg, counters_offs_delta); // can't use addi with R0
2055       add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
2056       counters_offs_delta = sizeof(uintx);
2057 
2058       Label check_abort;
2059       rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
2060       if (tm_failure_inv[i]) {
2061         bne(CCR0, check_abort);
2062       } else {
2063         beq(CCR0, check_abort);
2064       }
2065       //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
2066       ldx(temp_Reg, addr_Reg);
2067       addi(temp_Reg, temp_Reg, 1);
2068       stdx(temp_Reg, addr_Reg);
2069       bind(check_abort);
2070     }
2071   }
2072   li(temp_Reg, -counters_offs); // can't use addi with R0
2073   add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
2074 }
2075 
2076 // Branch if (random & (count-1) != 0), count is 2^n
2077 // tmp and CR0 are killed
2078 void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
2079   mftb(tmp);
2080   andi_(tmp, tmp, count-1);
2081   bne(CCR0, brLabel);
2082 }
2083 
2084 // Perform abort ratio calculation, set no_rtm bit if high ratio.
2085 // input:  rtm_counters_Reg (RTMLockingCounters* address) - KILLED
2086 void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
2087                                                  RTMLockingCounters* rtm_counters,
2088                                                  Metadata* method_data) {
2089   Label L_done, L_check_always_rtm1, L_check_always_rtm2;
2090 
2091   if (RTMLockingCalculationDelay > 0) {
2092     // Delay calculation.
2093     ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
2094     cmpdi(CCR0, rtm_counters_Reg, 0);
2095     beq(CCR0, L_done);
2096     load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2097   }
2098   // Abort ratio calculation only if abort_count > RTMAbortThreshold.
2099   //   Aborted transactions = abort_count * 100
2100   //   All transactions = total_count *  RTMTotalCountIncrRate
2101   //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
2102   ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
2103   cmpdi(CCR0, R0, RTMAbortThreshold);
2104   blt(CCR0, L_check_always_rtm2);
2105   mulli(R0, R0, 100);
2106 
2107   const Register tmpReg = rtm_counters_Reg;
2108   ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2109   mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
2110   mulli(tmpReg, tmpReg, RTMAbortRatio);
2111   cmpd(CCR0, R0, tmpReg);
2112   blt(CCR0, L_check_always_rtm1); // jump to reload
2113   if (method_data != NULL) {
2114     // Set rtm_state to "no rtm" in MDO.
2115     // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
2116     // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
2117     load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
2118     atomic_ori_int(R0, tmpReg, NoRTM);
2119   }
2120   b(L_done);
2121 
2122   bind(L_check_always_rtm1);
2123   load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2124   bind(L_check_always_rtm2);
2125   ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2126   cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
2127   blt(CCR0, L_done);
2128   if (method_data != NULL) {
2129     // Set rtm_state to "always rtm" in MDO.
2130     // Not using a metadata relocation. See above.
2131     load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
2132     atomic_ori_int(R0, tmpReg, UseRTM);
2133   }
2134   bind(L_done);
2135 }
2136 
2137 // Update counters and perform abort ratio calculation.
2138 // input: abort_status_Reg
2139 void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
2140                                    RTMLockingCounters* rtm_counters,
2141                                    Metadata* method_data,
2142                                    bool profile_rtm) {
2143 
2144   assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2145   // Update rtm counters based on state at abort.
2146   // Reads abort_status_Reg, updates flags.
2147   assert_different_registers(abort_status_Reg, temp_Reg);
2148   load_const_optimized(temp_Reg, (address)rtm_counters, R0);
2149   rtm_counters_update(abort_status_Reg, temp_Reg);
2150   if (profile_rtm) {
2151     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2152     rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
2153   }
2154 }
2155 
2156 // Retry on abort if abort's status indicates non-persistent failure.
2157 // inputs: retry_count_Reg
2158 //       : abort_status_Reg
2159 // output: retry_count_Reg decremented by 1
2160 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
2161                                              Label& retryLabel, Label* checkRetry) {
2162   Label doneRetry;
2163   rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
2164   bne(CCR0, doneRetry);
2165   if (checkRetry) { bind(*checkRetry); }
2166   addic_(retry_count_Reg, retry_count_Reg, -1);
2167   blt(CCR0, doneRetry);
2168   smt_yield(); // Can't use wait(). No permission (SIGILL).
2169   b(retryLabel);
2170   bind(doneRetry);
2171 }
2172 
2173 // Spin and retry if lock is busy.
2174 // inputs: box_Reg (monitor address)
2175 //       : retry_count_Reg
2176 // output: retry_count_Reg decremented by 1
2177 // CTR is killed
2178 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
2179   Label SpinLoop, doneRetry;
2180   addic_(retry_count_Reg, retry_count_Reg, -1);
2181   blt(CCR0, doneRetry);
2182   li(R0, RTMSpinLoopCount);
2183   mtctr(R0);
2184 
2185   bind(SpinLoop);
2186   smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
2187   bdz(retryLabel);
2188   ld(R0, 0, owner_addr_Reg);
2189   cmpdi(CCR0, R0, 0);
2190   bne(CCR0, SpinLoop);
2191   b(retryLabel);
2192 
2193   bind(doneRetry);
2194 }
2195 
2196 // Use RTM for normal stack locks.
2197 // Input: objReg (object to lock)
2198 void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
2199                                        Register obj, Register mark_word, Register tmp,
2200                                        Register retry_on_abort_count_Reg,
2201                                        RTMLockingCounters* stack_rtm_counters,
2202                                        Metadata* method_data, bool profile_rtm,
2203                                        Label& DONE_LABEL, Label& IsInflated) {
2204   assert(UseRTMForStackLocks, "why call this otherwise?");
2205   assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2206   Label L_rtm_retry, L_decrement_retry, L_on_abort;
2207 
2208   if (RTMRetryCount > 0) {
2209     load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
2210     bind(L_rtm_retry);
2211   }
2212   andi_(R0, mark_word, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
2213   bne(CCR0, IsInflated);
2214 
2215   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2216     Label L_noincrement;
2217     if (RTMTotalCountIncrRate > 1) {
2218       branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
2219     }
2220     assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
2221     load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
2222     //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
2223     ldx(mark_word, tmp);
2224     addi(mark_word, mark_word, 1);
2225     stdx(mark_word, tmp);
2226     bind(L_noincrement);
2227   }
2228   tbegin_();
2229   beq(CCR0, L_on_abort);
2230   ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);         // Reload in transaction, conflicts need to be tracked.
2231   andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2232   cmpwi(flag, R0, markOopDesc::unlocked_value);                // bits = 001 unlocked
2233   beq(flag, DONE_LABEL);                                       // all done if unlocked
2234 
2235   if (UseRTMXendForLockBusy) {
2236     tend_();
2237     b(L_decrement_retry);
2238   } else {
2239     tabort_();
2240   }
2241   bind(L_on_abort);
2242   const Register abort_status_Reg = tmp;
2243   mftexasr(abort_status_Reg);
2244   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2245     rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
2246   }
2247   ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
2248   if (RTMRetryCount > 0) {
2249     // Retry on lock abort if abort status is not permanent.
2250     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
2251   } else {
2252     bind(L_decrement_retry);
2253   }
2254 }
2255 
2256 // Use RTM for inflating locks
2257 // inputs: obj       (object to lock)
2258 //         mark_word (current header - KILLED)
2259 //         boxReg    (on-stack box address (displaced header location) - KILLED)
2260 void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
2261                                           Register obj, Register mark_word, Register boxReg,
2262                                           Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
2263                                           RTMLockingCounters* rtm_counters,
2264                                           Metadata* method_data, bool profile_rtm,
2265                                           Label& DONE_LABEL) {
2266   assert(UseRTMLocking, "why call this otherwise?");
2267   Label L_rtm_retry, L_decrement_retry, L_on_abort;
2268   // Clean monitor_value bit to get valid pointer.
2269   int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
2270 
2271   // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
2272   std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
2273   const Register tmpReg = boxReg;
2274   const Register owner_addr_Reg = mark_word;
2275   addi(owner_addr_Reg, mark_word, owner_offset);
2276 
2277   if (RTMRetryCount > 0) {
2278     load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy.
2279     load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
2280     bind(L_rtm_retry);
2281   }
2282   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2283     Label L_noincrement;
2284     if (RTMTotalCountIncrRate > 1) {
2285       branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
2286     }
2287     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2288     load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
2289     //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
2290     ldx(tmpReg, R0);
2291     addi(tmpReg, tmpReg, 1);
2292     stdx(tmpReg, R0);
2293     bind(L_noincrement);
2294   }
2295   tbegin_();
2296   beq(CCR0, L_on_abort);
2297   // We don't reload mark word. Will only be reset at safepoint.
2298   ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
2299   cmpdi(flag, R0, 0);
2300   beq(flag, DONE_LABEL);
2301 
2302   if (UseRTMXendForLockBusy) {
2303     tend_();
2304     b(L_decrement_retry);
2305   } else {
2306     tabort_();
2307   }
2308   bind(L_on_abort);
2309   const Register abort_status_Reg = tmpReg;
2310   mftexasr(abort_status_Reg);
2311   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2312     rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
2313     // Restore owner_addr_Reg
2314     ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
2315 #ifdef ASSERT
2316     andi_(R0, mark_word, markOopDesc::monitor_value);
2317     asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
2318 #endif
2319     addi(owner_addr_Reg, mark_word, owner_offset);
2320   }
2321   if (RTMRetryCount > 0) {
2322     // Retry on lock abort if abort status is not permanent.
2323     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
2324   }
2325 
2326   // Appears unlocked - try to swing _owner from null to non-null.
2327   cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
2328            MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2329            MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
2330 
2331   if (RTMRetryCount > 0) {
2332     // success done else retry
2333     b(DONE_LABEL);
2334     bind(L_decrement_retry);
2335     // Spin and retry if lock is busy.
2336     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
2337   } else {
2338     bind(L_decrement_retry);
2339   }
2340 }
2341 
2342 #endif //  INCLUDE_RTM_OPT
2343 
2344 // "The box" is the space on the stack where we copy the object mark.
2345 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2346                                                Register temp, Register displaced_header, Register current_header,
2347                                                bool try_bias,
2348                                                RTMLockingCounters* rtm_counters,
2349                                                RTMLockingCounters* stack_rtm_counters,
2350                                                Metadata* method_data,
2351                                                bool use_rtm, bool profile_rtm) {
2352   assert_different_registers(oop, box, temp, displaced_header, current_header);
2353   assert(flag != CCR0, "bad condition register");
2354   Label cont;
2355   Label object_has_monitor;
2356   Label cas_failed;
2357 
2358   // Load markOop from object into displaced_header.
2359   ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2360 
2361 
2362   // Always do locking in runtime.
2363   if (EmitSync & 0x01) {
2364     cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2365     return;
2366   }
2367 
2368   if (try_bias) {
2369     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
2370   }
2371 
2372 #if INCLUDE_RTM_OPT
2373   if (UseRTMForStackLocks && use_rtm) {
2374     rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
2375                       stack_rtm_counters, method_data, profile_rtm,
2376                       cont, object_has_monitor);
2377   }
2378 #endif // INCLUDE_RTM_OPT
2379 
2380   // Handle existing monitor.
2381   if ((EmitSync & 0x02) == 0) {
2382     // The object has an existing monitor iff (mark & monitor_value) != 0.
2383     andi_(temp, displaced_header, markOopDesc::monitor_value);
2384     bne(CCR0, object_has_monitor);
2385   }
2386 
2387   // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
2388   ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
2389 
2390   // Load Compare Value application register.
2391 
2392   // Initialize the box. (Must happen before we update the object mark!)
2393   std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2394 
2395   // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2396   // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
2397   // CmpxchgX sets cr_reg to cmpX(current, displaced).
2398   membar(Assembler::StoreStore);
2399   cmpxchgd(/*flag=*/flag,


2416 
2417   // Check if the owner is self by comparing the value in the markOop of object
2418   // (current_header) with the stack pointer.
2419   sub(current_header, current_header, R1_SP);
2420   load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
2421                                         markOopDesc::lock_mask_in_place));
2422 
2423   and_(R0/*==0?*/, current_header, temp);
2424   // If condition is true we are cont and hence we can store 0 as the
2425   // displaced header in the box, which indicates that it is a recursive lock.
2426   mcrf(flag,CCR0);
2427   std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2428 
2429   // Handle existing monitor.
2430   if ((EmitSync & 0x02) == 0) {
2431     b(cont);
2432 
2433     bind(object_has_monitor);
2434     // The object's monitor m is unlocked iff m->owner == NULL,
2435     // otherwise m->owner may contain a thread or a stack address.
2436 
2437 #if INCLUDE_RTM_OPT
2438     // Use the same RTM locking code in 32- and 64-bit VM.
2439     if (use_rtm) {
2440       rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
2441                            rtm_counters, method_data, profile_rtm, cont);
2442     } else {
2443 #endif // INCLUDE_RTM_OPT
2444 
2445     // Try to CAS m->owner from NULL to current thread.
2446     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
2447     li(displaced_header, 0);
2448     // CmpxchgX sets flag to cmpX(current, displaced).
2449     cmpxchgd(/*flag=*/flag,
2450              /*current_value=*/current_header,
2451              /*compare_value=*/(intptr_t)0,
2452              /*exchange_value=*/R16_thread,
2453              /*where=*/temp,
2454              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2455              MacroAssembler::cmpxchgx_hint_acquire_lock());
2456 
2457     // Store a non-null value into the box.
2458     std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2459 
2460 #   ifdef ASSERT
2461     bne(flag, cont);
2462     // We have acquired the monitor, check some invariants.
2463     addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
2464     // Invariant 1: _recursions should be 0.
2465     //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
2466     asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
2467                             "monitor->_recursions should be 0", -1);
2468     // Invariant 2: OwnerIsThread shouldn't be 0.
2469     //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
2470     //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
2471     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
2472 #   endif
2473 
2474 #if INCLUDE_RTM_OPT
2475     } // use_rtm()
2476 #endif
2477   }
2478 
2479   bind(cont);
2480   // flag == EQ indicates success
2481   // flag == NE indicates failure
2482 }
2483 
2484 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2485                                                  Register temp, Register displaced_header, Register current_header,
2486                                                  bool try_bias, bool use_rtm) {
2487   assert_different_registers(oop, box, temp, displaced_header, current_header);
2488   assert(flag != CCR0, "bad condition register");
2489   Label cont;
2490   Label object_has_monitor;
2491 
2492   // Always do locking in runtime.
2493   if (EmitSync & 0x01) {
2494     cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2495     return;
2496   }
2497 
2498   if (try_bias) {
2499     biased_locking_exit(flag, oop, current_header, cont);
2500   }
2501 
2502 #if INCLUDE_RTM_OPT
2503   if (UseRTMForStackLocks && use_rtm) {
2504     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2505     Label L_regular_unlock;
2506     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);         // fetch markword
2507     andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2508     cmpwi(flag, R0, markOopDesc::unlocked_value);                     // bits = 001 unlocked
2509     bne(flag, L_regular_unlock);                                      // else RegularLock
2510     tend_();                                                          // otherwise end...
2511     b(cont);                                                          // ... and we're done
2512     bind(L_regular_unlock);
2513   }
2514 #endif
2515 
2516   // Find the lock address and load the displaced header from the stack.
2517   ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2518 
2519   // If the displaced header is 0, we have a recursive unlock.
2520   cmpdi(flag, displaced_header, 0);
2521   beq(flag, cont);
2522 
2523   // Handle existing monitor.
2524   if ((EmitSync & 0x02) == 0) {
2525     // The object has an existing monitor iff (mark & monitor_value) != 0.
2526     RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
2527     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2528     andi_(R0, current_header, markOopDesc::monitor_value);
2529     bne(CCR0, object_has_monitor);

2530   }
2531 

2532   // Check if it is still a light weight lock, this is is true if we see
2533   // the stack address of the basicLock in the markOop of the object.
2534   // Cmpxchg sets flag to cmpd(current_header, box).
2535   cmpxchgd(/*flag=*/flag,
2536            /*current_value=*/current_header,
2537            /*compare_value=*/box,
2538            /*exchange_value=*/displaced_header,
2539            /*where=*/oop,
2540            MacroAssembler::MemBarRel,
2541            MacroAssembler::cmpxchgx_hint_release_lock(),
2542            noreg,
2543            &cont);
2544 
2545   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2546 
2547   // Handle existing monitor.
2548   if ((EmitSync & 0x02) == 0) {
2549     b(cont);
2550 
2551     bind(object_has_monitor);
2552     addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
2553     ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
2554 
2555     // It's inflated.
2556 #if INCLUDE_RTM_OPT
2557     if (use_rtm) {
2558       Label L_regular_inflated_unlock;
2559       // Clean monitor_value bit to get valid pointer
2560       cmpdi(flag, temp, 0);
2561       bne(flag, L_regular_inflated_unlock);
2562       tend_();
2563       b(cont);
2564       bind(L_regular_inflated_unlock);
2565     }
2566 #endif
2567 
2568     ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
2569     xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
2570     orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
2571     cmpdi(flag, temp, 0);
2572     bne(flag, cont);
2573 
2574     ld(temp,             ObjectMonitor::EntryList_offset_in_bytes(), current_header);
2575     ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
2576     orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2577     cmpdi(flag, temp, 0);
2578     bne(flag, cont);
2579     release();
2580     std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2581   }
2582 
2583   bind(cont);
2584   // flag == EQ indicates success
2585   // flag == NE indicates failure
2586 }
2587 


2831   // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
2832 #ifdef CC_INTERP
2833   ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp);
2834 #else
2835   address entry = pc();
2836   load_const_optimized(tmp1, entry);
2837 #endif
2838 
2839   set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1);
2840 }
2841 
2842 void MacroAssembler::get_vm_result(Register oop_result) {
2843   // Read:
2844   //   R16_thread
2845   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
2846   //
2847   // Updated:
2848   //   oop_result
2849   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
2850 
2851   verify_thread();
2852 
2853   ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2854   li(R0, 0);
2855   std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2856 
2857   verify_oop(oop_result);
2858 }
2859 
2860 void MacroAssembler::get_vm_result_2(Register metadata_result) {
2861   // Read:
2862   //   R16_thread
2863   //   R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2864   //
2865   // Updated:
2866   //   metadata_result
2867   //   R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2868 
2869   ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2870   li(R0, 0);
2871   std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2872 }
2873 
2874 Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {

2875   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
2876   if (Universe::narrow_klass_base() != 0) {
2877     // Use dst as temp if it is free.
2878     sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);

2879     current = dst;
2880   }
2881   if (Universe::narrow_klass_shift() != 0) {
2882     srdi(dst, current, Universe::narrow_klass_shift());
2883     current = dst;
2884   }
2885   return current;
2886 }
2887 
2888 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
2889   if (UseCompressedClassPointers) {
2890     Register compressedKlass = encode_klass_not_null(ck, klass);
2891     stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
2892   } else {
2893     std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
2894   }
2895 }
2896 
2897 void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
2898   if (UseCompressedClassPointers) {
2899     if (val == noreg) {
2900       val = R0;
2901       li(val, 0);
2902     }
2903     stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
2904   }
2905 }
2906 
2907 int MacroAssembler::instr_size_for_decode_klass_not_null() {
2908   if (!UseCompressedClassPointers) return 0;
2909   int num_instrs = 1;  // shift or move
2910   if (Universe::narrow_klass_base() != 0) num_instrs = 7;  // shift + load const + add
2911   return num_instrs * BytesPerInstWord;
2912 }
2913 
2914 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2915   assert(dst != R0, "Dst reg may not be R0, as R0 is used here.");
2916   if (src == noreg) src = dst;
2917   Register shifted_src = src;
2918   if (Universe::narrow_klass_shift() != 0 ||
2919       Universe::narrow_klass_base() == 0 && src != dst) {  // Move required.
2920     shifted_src = dst;
2921     sldi(shifted_src, src, Universe::narrow_klass_shift());
2922   }
2923   if (Universe::narrow_klass_base() != 0) {
2924     add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);

2925   }
2926 }
2927 
2928 void MacroAssembler::load_klass(Register dst, Register src) {
2929   if (UseCompressedClassPointers) {
2930     lwz(dst, oopDesc::klass_offset_in_bytes(), src);
2931     // Attention: no null check here!
2932     decode_klass_not_null(dst, dst);
2933   } else {
2934     ld(dst, oopDesc::klass_offset_in_bytes(), src);
2935   }
2936 }
2937 
2938 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
2939   if (!os::zero_page_read_protected()) {
2940     if (TrapBasedNullChecks) {
2941       trap_null_check(src);
2942     }
2943   }
2944   load_klass(dst, src);


< prev index next >