1 /*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2014 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
1438 }
1439
1440 // Preforms atomic compare exchange:
1441 // if (compare_value == *addr_base)
1442 // *addr_base = exchange_value
1443 // int_flag_success = 1;
1444 // else
1445 // int_flag_success = 0;
1446 //
1447 // ConditionRegister flag = cmp(compare_value, *addr_base)
1448 // Register dest_current_value = *addr_base
1449 // Register compare_value Used to compare with value in memory
1450 // Register exchange_value Written to memory if compare_value == *addr_base
1451 // Register addr_base The memory location to compareXChange
1452 // Register int_flag_success Set to 1 if exchange_value was written to *addr_base
1453 //
1454 // To avoid the costly compare exchange the value is tested beforehand.
1455 // Several special cases exist to avoid that unnecessary information is generated.
1456 //
1457 void MacroAssembler::cmpxchgd(ConditionRegister flag,
1458 Register dest_current_value, Register compare_value, Register exchange_value,
1459 Register addr_base, int semantics, bool cmpxchgx_hint,
1460 Register int_flag_success, Label* failed_ext, bool contention_hint) {
1461 Label retry;
1462 Label failed_int;
1463 Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
1464 Label done;
1465
1466 // Save one branch if result is returned via register and result register is different from the other ones.
1467 bool use_result_reg = (int_flag_success!=noreg);
1468 bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
1469 int_flag_success!=exchange_value && int_flag_success!=addr_base);
1470 assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
1471
1472 // release/fence semantics
1473 if (semantics & MemBarRel) {
1474 release();
1475 }
1476
1477 if (use_result_reg && preset_result_reg) {
1478 li(int_flag_success, 0); // preset (assume cas failed)
1479 }
1480
1481 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1482 if (contention_hint) { // Don't try to reserve if cmp fails.
1483 ld(dest_current_value, 0, addr_base);
1484 cmpd(flag, dest_current_value, compare_value);
1485 bne(flag, failed);
1486 }
1487
1488 // atomic emulation loop
1489 bind(retry);
1490
1491 ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1492 cmpd(flag, dest_current_value, compare_value);
1493 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1494 bne_predict_not_taken(flag, failed);
1495 } else {
1496 bne( flag, failed);
1497 }
1498
1499 stdcx_(exchange_value, addr_base);
1500 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1501 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1502 } else {
1503 bne( CCR0, retry); // stXcx_ sets CCR0
1504 }
1505
1506 // result in register (must do this at the end because int_flag_success can be the same register as one above)
1507 if (use_result_reg) {
1508 li(int_flag_success, 1);
1509 }
1510
1511 // POWER6 doesn't need isync in CAS.
1512 // Always emit isync to be on the safe side.
1856 // rotate epoch bits to right (little) end and set other bits to 0
1857 // [ big part | epoch | little part ] -> [ 0..0 | epoch ]
1858 rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits);
1859 // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented
1860 bne(CCR0, try_rebias);
1861
1862 // The epoch of the current bias is still valid but we know nothing
1863 // about the owner; it might be set or it might be clear. Try to
1864 // acquire the bias of the object using an atomic operation. If this
1865 // fails we will go in to the runtime to revoke the object's bias.
1866 // Note that we first construct the presumed unbiased header so we
1867 // don't accidentally blow away another thread's valid bias.
1868 andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place |
1869 markOopDesc::age_mask_in_place |
1870 markOopDesc::epoch_mask_in_place));
1871 orr(temp_reg, R16_thread, mark_reg);
1872
1873 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1874
1875 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1876 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1877 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1878 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1879 /*where=*/obj_reg,
1880 MacroAssembler::MemBarAcq,
1881 MacroAssembler::cmpxchgx_hint_acquire_lock(),
1882 noreg, slow_case_int); // bail out if failed
1883
1884 // If the biasing toward our thread failed, this means that
1885 // another thread succeeded in biasing it toward itself and we
1886 // need to revoke that bias. The revocation will occur in the
1887 // interpreter runtime in the slow case.
1888 if (PrintBiasedLockingStatistics) {
1889 load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
1890 lwz(temp2_reg, 0, temp_reg);
1891 addi(temp2_reg, temp2_reg, 1);
1892 stw(temp2_reg, 0, temp_reg);
1893 }
1894 b(done);
1895
1896 bind(try_rebias);
1897 // At this point we know the epoch has expired, meaning that the
1898 // current "bias owner", if any, is actually invalid. Under these
1899 // circumstances _only_, we are allowed to use the current header's
1900 // value as the comparison value when doing the cas to acquire the
1901 // bias in the current epoch. In other words, we allow transfer of
1902 // the bias from one thread to another directly in this situation.
1903 andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
1904 orr(temp_reg, R16_thread, temp_reg);
1905 load_klass(temp2_reg, obj_reg);
1906 ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
1907 orr(temp_reg, temp_reg, temp2_reg);
1908
1909 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1910
1911 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1912 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1913 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1914 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1915 /*where=*/obj_reg,
1916 MacroAssembler::MemBarAcq,
1917 MacroAssembler::cmpxchgx_hint_acquire_lock(),
1918 noreg, slow_case_int); // bail out if failed
1919
1920 // If the biasing toward our thread failed, this means that
1921 // another thread succeeded in biasing it toward itself and we
1922 // need to revoke that bias. The revocation will occur in the
1923 // interpreter runtime in the slow case.
1924 if (PrintBiasedLockingStatistics) {
1925 load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
1926 lwz(temp2_reg, 0, temp_reg);
1927 addi(temp2_reg, temp2_reg, 1);
1928 stw(temp2_reg, 0, temp_reg);
1929 }
1930 b(done);
1931
1932 bind(try_revoke_bias);
1933 // The prototype mark in the klass doesn't have the bias bit set any
1934 // more, indicating that objects of this data type are not supposed
1935 // to be biased any more. We are going to try to reset the mark of
1936 // this object to the prototype value and fall through to the
1937 // CAS-based locking scheme. Note that if our CAS fails, it means
1938 // that another thread raced us for the privilege of revoking the
1939 // bias of this particular object, so it's okay to continue in the
1940 // normal locking code.
1941 load_klass(temp_reg, obj_reg);
1942 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
1943 andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
1944 orr(temp_reg, temp_reg, temp2_reg);
1945
1946 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1947
1948 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1949 fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
1950 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1951 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1952 /*where=*/obj_reg,
1953 MacroAssembler::MemBarAcq,
1954 MacroAssembler::cmpxchgx_hint_acquire_lock());
1955
1956 // reload markOop in mark_reg before continuing with lightweight locking
1957 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
1958
1959 // Fall through to the normal CAS-based lock, because no matter what
1960 // the result of the above CAS, some thread must have succeeded in
1961 // removing the bias bit from the object's header.
1962 if (PrintBiasedLockingStatistics) {
1963 Label l;
1964 bne(cr_reg, l);
1965 load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
1966 lwz(temp2_reg, 0, temp_reg);
1967 addi(temp2_reg, temp2_reg, 1);
1968 stw(temp2_reg, 0, temp_reg);
1969 bind(l);
1970 }
1971
1972 bind(cas_label);
1973 }
1974
1975 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) {
1976 // Check for biased locking unlock case, which is a no-op
1977 // Note: we do not have to check the thread ID for two reasons.
1978 // First, the interpreter checks for IllegalMonitorStateException at
1979 // a higher level. Second, if the bias was revoked while we held the
1980 // lock, the object could not be rebiased toward another thread, so
1981 // the bias bit would be clear.
1982
1983 ld(temp_reg, 0, mark_addr);
1984 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1985
1986 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
1987 beq(cr_reg, done);
1988 }
1989
1990 // "The box" is the space on the stack where we copy the object mark.
1991 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
1992 Register temp, Register displaced_header, Register current_header) {
1993 assert_different_registers(oop, box, temp, displaced_header, current_header);
1994 assert(flag != CCR0, "bad condition register");
1995 Label cont;
1996 Label object_has_monitor;
1997 Label cas_failed;
1998
1999 // Load markOop from object into displaced_header.
2000 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2001
2002
2003 // Always do locking in runtime.
2004 if (EmitSync & 0x01) {
2005 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2006 return;
2007 }
2008
2009 if (UseBiasedLocking) {
2010 biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
2011 }
2012
2013 // Handle existing monitor.
2014 if ((EmitSync & 0x02) == 0) {
2015 // The object has an existing monitor iff (mark & monitor_value) != 0.
2016 andi_(temp, displaced_header, markOopDesc::monitor_value);
2017 bne(CCR0, object_has_monitor);
2018 }
2019
2020 // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
2021 ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
2022
2023 // Load Compare Value application register.
2024
2025 // Initialize the box. (Must happen before we update the object mark!)
2026 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2027
2028 // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2029 // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
2030 // CmpxchgX sets cr_reg to cmpX(current, displaced).
2031 membar(Assembler::StoreStore);
2032 cmpxchgd(/*flag=*/flag,
2049
2050 // Check if the owner is self by comparing the value in the markOop of object
2051 // (current_header) with the stack pointer.
2052 sub(current_header, current_header, R1_SP);
2053 load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
2054 markOopDesc::lock_mask_in_place));
2055
2056 and_(R0/*==0?*/, current_header, temp);
2057 // If condition is true we are cont and hence we can store 0 as the
2058 // displaced header in the box, which indicates that it is a recursive lock.
2059 mcrf(flag,CCR0);
2060 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2061
2062 // Handle existing monitor.
2063 if ((EmitSync & 0x02) == 0) {
2064 b(cont);
2065
2066 bind(object_has_monitor);
2067 // The object's monitor m is unlocked iff m->owner == NULL,
2068 // otherwise m->owner may contain a thread or a stack address.
2069 //
2070 // Try to CAS m->owner from NULL to current thread.
2071 addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
2072 li(displaced_header, 0);
2073 // CmpxchgX sets flag to cmpX(current, displaced).
2074 cmpxchgd(/*flag=*/flag,
2075 /*current_value=*/current_header,
2076 /*compare_value=*/displaced_header,
2077 /*exchange_value=*/R16_thread,
2078 /*where=*/temp,
2079 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2080 MacroAssembler::cmpxchgx_hint_acquire_lock());
2081
2082 // Store a non-null value into the box.
2083 std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2084
2085 # ifdef ASSERT
2086 bne(flag, cont);
2087 // We have acquired the monitor, check some invariants.
2088 addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
2089 // Invariant 1: _recursions should be 0.
2090 //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
2091 asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
2092 "monitor->_recursions should be 0", -1);
2093 // Invariant 2: OwnerIsThread shouldn't be 0.
2094 //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
2095 //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
2096 // "monitor->OwnerIsThread shouldn't be 0", -1);
2097 # endif
2098 }
2099
2100 bind(cont);
2101 // flag == EQ indicates success
2102 // flag == NE indicates failure
2103 }
2104
2105 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2106 Register temp, Register displaced_header, Register current_header) {
2107 assert_different_registers(oop, box, temp, displaced_header, current_header);
2108 assert(flag != CCR0, "bad condition register");
2109 Label cont;
2110 Label object_has_monitor;
2111
2112 // Always do locking in runtime.
2113 if (EmitSync & 0x01) {
2114 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2115 return;
2116 }
2117
2118 if (UseBiasedLocking) {
2119 biased_locking_exit(flag, oop, current_header, cont);
2120 }
2121
2122 // Find the lock address and load the displaced header from the stack.
2123 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2124
2125 // If the displaced header is 0, we have a recursive unlock.
2126 cmpdi(flag, displaced_header, 0);
2127 beq(flag, cont);
2128
2129 // Handle existing monitor.
2130 if ((EmitSync & 0x02) == 0) {
2131 // The object has an existing monitor iff (mark & monitor_value) != 0.
2132 ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2133 andi(temp, current_header, markOopDesc::monitor_value);
2134 cmpdi(flag, temp, 0);
2135 bne(flag, object_has_monitor);
2136 }
2137
2138
2139 // Check if it is still a light weight lock, this is is true if we see
2140 // the stack address of the basicLock in the markOop of the object.
2141 // Cmpxchg sets flag to cmpd(current_header, box).
2142 cmpxchgd(/*flag=*/flag,
2143 /*current_value=*/current_header,
2144 /*compare_value=*/box,
2145 /*exchange_value=*/displaced_header,
2146 /*where=*/oop,
2147 MacroAssembler::MemBarRel,
2148 MacroAssembler::cmpxchgx_hint_release_lock(),
2149 noreg,
2150 &cont);
2151
2152 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2153
2154 // Handle existing monitor.
2155 if ((EmitSync & 0x02) == 0) {
2156 b(cont);
2157
2158 bind(object_has_monitor);
2159 addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
2160 ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2161 ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
2162 xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
2163 orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
2164 cmpdi(flag, temp, 0);
2165 bne(flag, cont);
2166
2167 ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
2168 ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
2169 orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2170 cmpdi(flag, temp, 0);
2171 bne(flag, cont);
2172 release();
2173 std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2174 }
2175
2176 bind(cont);
2177 // flag == EQ indicates success
2178 // flag == NE indicates failure
2179 }
2180
2424 // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
2425 #ifdef CC_INTERP
2426 ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp);
2427 #else
2428 address entry = pc();
2429 load_const_optimized(tmp1, entry);
2430 #endif
2431
2432 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1);
2433 }
2434
2435 void MacroAssembler::get_vm_result(Register oop_result) {
2436 // Read:
2437 // R16_thread
2438 // R16_thread->in_bytes(JavaThread::vm_result_offset())
2439 //
2440 // Updated:
2441 // oop_result
2442 // R16_thread->in_bytes(JavaThread::vm_result_offset())
2443
2444 ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2445 li(R0, 0);
2446 std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2447
2448 verify_oop(oop_result);
2449 }
2450
2451 void MacroAssembler::get_vm_result_2(Register metadata_result) {
2452 // Read:
2453 // R16_thread
2454 // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2455 //
2456 // Updated:
2457 // metadata_result
2458 // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2459
2460 ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2461 li(R0, 0);
2462 std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2463 }
2464
2465
2466 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2467 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
2468 if (Universe::narrow_klass_base() != 0) {
2469 // Use dst as temp if it is free.
2470 load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
2471 sub(dst, current, R0);
2472 current = dst;
2473 }
2474 if (Universe::narrow_klass_shift() != 0) {
2475 srdi(dst, current, Universe::narrow_klass_shift());
2476 current = dst;
2477 }
2478 mr_if_needed(dst, current); // Move may be required.
2479 }
2480
2481 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
2482 if (UseCompressedClassPointers) {
2483 encode_klass_not_null(ck, klass);
2484 stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
2485 } else {
2486 std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
2487 }
2488 }
2489
2490 void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
2491 if (UseCompressedClassPointers) {
2492 if (val == noreg) {
2493 val = R0;
2494 li(val, 0);
2495 }
2496 stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
2497 }
2498 }
2499
2500 int MacroAssembler::instr_size_for_decode_klass_not_null() {
2501 if (!UseCompressedClassPointers) return 0;
2502 int num_instrs = 1; // shift or move
2503 if (Universe::narrow_klass_base() != 0) num_instrs = 7; // shift + load const + add
2504 return num_instrs * BytesPerInstWord;
2505 }
2506
2507 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2508 assert(dst != R0, "Dst reg may not be R0, as R0 is used here.");
2509 if (src == noreg) src = dst;
2510 Register shifted_src = src;
2511 if (Universe::narrow_klass_shift() != 0 ||
2512 Universe::narrow_klass_base() == 0 && src != dst) { // Move required.
2513 shifted_src = dst;
2514 sldi(shifted_src, src, Universe::narrow_klass_shift());
2515 }
2516 if (Universe::narrow_klass_base() != 0) {
2517 load_const(R0, Universe::narrow_klass_base());
2518 add(dst, shifted_src, R0);
2519 }
2520 }
2521
2522 void MacroAssembler::load_klass(Register dst, Register src) {
2523 if (UseCompressedClassPointers) {
2524 lwz(dst, oopDesc::klass_offset_in_bytes(), src);
2525 // Attention: no null check here!
2526 decode_klass_not_null(dst, dst);
2527 } else {
2528 ld(dst, oopDesc::klass_offset_in_bytes(), src);
2529 }
2530 }
2531
2532 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
2533 if (!os::zero_page_read_protected()) {
2534 if (TrapBasedNullChecks) {
2535 trap_null_check(src);
2536 }
2537 }
2538 load_klass(dst, src);
|
1 /*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2015 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
1438 }
1439
1440 // Preforms atomic compare exchange:
1441 // if (compare_value == *addr_base)
1442 // *addr_base = exchange_value
1443 // int_flag_success = 1;
1444 // else
1445 // int_flag_success = 0;
1446 //
1447 // ConditionRegister flag = cmp(compare_value, *addr_base)
1448 // Register dest_current_value = *addr_base
1449 // Register compare_value Used to compare with value in memory
1450 // Register exchange_value Written to memory if compare_value == *addr_base
1451 // Register addr_base The memory location to compareXChange
1452 // Register int_flag_success Set to 1 if exchange_value was written to *addr_base
1453 //
1454 // To avoid the costly compare exchange the value is tested beforehand.
1455 // Several special cases exist to avoid that unnecessary information is generated.
1456 //
1457 void MacroAssembler::cmpxchgd(ConditionRegister flag,
1458 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
1459 Register addr_base, int semantics, bool cmpxchgx_hint,
1460 Register int_flag_success, Label* failed_ext, bool contention_hint) {
1461 Label retry;
1462 Label failed_int;
1463 Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
1464 Label done;
1465
1466 // Save one branch if result is returned via register and result register is different from the other ones.
1467 bool use_result_reg = (int_flag_success!=noreg);
1468 bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
1469 int_flag_success!=exchange_value && int_flag_success!=addr_base);
1470 assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
1471
1472 // release/fence semantics
1473 if (semantics & MemBarRel) {
1474 release();
1475 }
1476
1477 if (use_result_reg && preset_result_reg) {
1478 li(int_flag_success, 0); // preset (assume cas failed)
1479 }
1480
1481 // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
1482 if (contention_hint) { // Don't try to reserve if cmp fails.
1483 ld(dest_current_value, 0, addr_base);
1484 cmpd(flag, compare_value, dest_current_value);
1485 bne(flag, failed);
1486 }
1487
1488 // atomic emulation loop
1489 bind(retry);
1490
1491 ldarx(dest_current_value, addr_base, cmpxchgx_hint);
1492 cmpd(flag, compare_value, dest_current_value);
1493 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1494 bne_predict_not_taken(flag, failed);
1495 } else {
1496 bne( flag, failed);
1497 }
1498
1499 stdcx_(exchange_value, addr_base);
1500 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1501 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1502 } else {
1503 bne( CCR0, retry); // stXcx_ sets CCR0
1504 }
1505
1506 // result in register (must do this at the end because int_flag_success can be the same register as one above)
1507 if (use_result_reg) {
1508 li(int_flag_success, 1);
1509 }
1510
1511 // POWER6 doesn't need isync in CAS.
1512 // Always emit isync to be on the safe side.
1856 // rotate epoch bits to right (little) end and set other bits to 0
1857 // [ big part | epoch | little part ] -> [ 0..0 | epoch ]
1858 rldicl_(temp2_reg, temp_reg, shift_amount, 64 - markOopDesc::epoch_bits);
1859 // branch if epoch bits are != 0, i.e. they differ, because the epoch has been incremented
1860 bne(CCR0, try_rebias);
1861
1862 // The epoch of the current bias is still valid but we know nothing
1863 // about the owner; it might be set or it might be clear. Try to
1864 // acquire the bias of the object using an atomic operation. If this
1865 // fails we will go in to the runtime to revoke the object's bias.
1866 // Note that we first construct the presumed unbiased header so we
1867 // don't accidentally blow away another thread's valid bias.
1868 andi(mark_reg, mark_reg, (markOopDesc::biased_lock_mask_in_place |
1869 markOopDesc::age_mask_in_place |
1870 markOopDesc::epoch_mask_in_place));
1871 orr(temp_reg, R16_thread, mark_reg);
1872
1873 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1874
1875 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1876 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1877 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1878 /*where=*/obj_reg,
1879 MacroAssembler::MemBarAcq,
1880 MacroAssembler::cmpxchgx_hint_acquire_lock(),
1881 noreg, slow_case_int); // bail out if failed
1882
1883 // If the biasing toward our thread failed, this means that
1884 // another thread succeeded in biasing it toward itself and we
1885 // need to revoke that bias. The revocation will occur in the
1886 // interpreter runtime in the slow case.
1887 if (PrintBiasedLockingStatistics) {
1888 load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
1889 lwz(temp2_reg, 0, temp_reg);
1890 addi(temp2_reg, temp2_reg, 1);
1891 stw(temp2_reg, 0, temp_reg);
1892 }
1893 b(done);
1894
1895 bind(try_rebias);
1896 // At this point we know the epoch has expired, meaning that the
1897 // current "bias owner", if any, is actually invalid. Under these
1898 // circumstances _only_, we are allowed to use the current header's
1899 // value as the comparison value when doing the cas to acquire the
1900 // bias in the current epoch. In other words, we allow transfer of
1901 // the bias from one thread to another directly in this situation.
1902 andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
1903 orr(temp_reg, R16_thread, temp_reg);
1904 load_klass(temp2_reg, obj_reg);
1905 ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
1906 orr(temp_reg, temp_reg, temp2_reg);
1907
1908 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1909
1910 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1911 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1912 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1913 /*where=*/obj_reg,
1914 MacroAssembler::MemBarAcq,
1915 MacroAssembler::cmpxchgx_hint_acquire_lock(),
1916 noreg, slow_case_int); // bail out if failed
1917
1918 // If the biasing toward our thread failed, this means that
1919 // another thread succeeded in biasing it toward itself and we
1920 // need to revoke that bias. The revocation will occur in the
1921 // interpreter runtime in the slow case.
1922 if (PrintBiasedLockingStatistics) {
1923 load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
1924 lwz(temp2_reg, 0, temp_reg);
1925 addi(temp2_reg, temp2_reg, 1);
1926 stw(temp2_reg, 0, temp_reg);
1927 }
1928 b(done);
1929
1930 bind(try_revoke_bias);
1931 // The prototype mark in the klass doesn't have the bias bit set any
1932 // more, indicating that objects of this data type are not supposed
1933 // to be biased any more. We are going to try to reset the mark of
1934 // this object to the prototype value and fall through to the
1935 // CAS-based locking scheme. Note that if our CAS fails, it means
1936 // that another thread raced us for the privilege of revoking the
1937 // bias of this particular object, so it's okay to continue in the
1938 // normal locking code.
1939 load_klass(temp_reg, obj_reg);
1940 ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
1941 andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
1942 orr(temp_reg, temp_reg, temp2_reg);
1943
1944 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
1945
1946 // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
1947 cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
1948 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
1949 /*where=*/obj_reg,
1950 MacroAssembler::MemBarAcq,
1951 MacroAssembler::cmpxchgx_hint_acquire_lock());
1952
1953 // reload markOop in mark_reg before continuing with lightweight locking
1954 ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
1955
1956 // Fall through to the normal CAS-based lock, because no matter what
1957 // the result of the above CAS, some thread must have succeeded in
1958 // removing the bias bit from the object's header.
1959 if (PrintBiasedLockingStatistics) {
1960 Label l;
1961 bne(cr_reg, l);
1962 load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
1963 lwz(temp2_reg, 0, temp_reg);
1964 addi(temp2_reg, temp2_reg, 1);
1965 stw(temp2_reg, 0, temp_reg);
1966 bind(l);
1967 }
1968
1969 bind(cas_label);
1970 }
1971
1972 void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done) {
1973 // Check for biased locking unlock case, which is a no-op
1974 // Note: we do not have to check the thread ID for two reasons.
1975 // First, the interpreter checks for IllegalMonitorStateException at
1976 // a higher level. Second, if the bias was revoked while we held the
1977 // lock, the object could not be rebiased toward another thread, so
1978 // the bias bit would be clear.
1979
1980 ld(temp_reg, 0, mark_addr);
1981 andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
1982
1983 cmpwi(cr_reg, temp_reg, markOopDesc::biased_lock_pattern);
1984 beq(cr_reg, done);
1985 }
1986
1987 // TM on PPC64.
1988 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
1989 Label retry;
1990 bind(retry);
1991 ldarx(result, addr, /*hint*/ false);
1992 addi(result, result, simm16);
1993 stdcx_(result, addr);
1994 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
1995 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
1996 } else {
1997 bne( CCR0, retry); // stXcx_ sets CCR0
1998 }
1999 }
2000
2001 void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
2002 Label retry;
2003 bind(retry);
2004 lwarx(result, addr, /*hint*/ false);
2005 ori(result, result, uimm16);
2006 stwcx_(result, addr);
2007 if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
2008 bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
2009 } else {
2010 bne( CCR0, retry); // stXcx_ sets CCR0
2011 }
2012 }
2013
2014 #if INCLUDE_RTM_OPT
2015
2016 // Update rtm_counters based on abort status
2017 // input: abort_status
2018 // rtm_counters (RTMLockingCounters*)
2019 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
2020 // Mapping to keep PreciseRTMLockingStatistics similar to x86.
2021 // x86 ppc (! means inverted, ? means not the same)
2022 // 0 31 Set if abort caused by XABORT instruction.
2023 // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
2024 // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
2025 // 3 10 Set if an internal buffer overflowed.
2026 // 4 ?12 Set if a debug breakpoint was hit.
2027 // 5 ?32 Set if an abort occurred during execution of a nested transaction.
2028 const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
2029 Assembler::tm_failure_persistent, // inverted: transient
2030 Assembler::tm_trans_cf,
2031 Assembler::tm_footprint_of,
2032 Assembler::tm_non_trans_cf,
2033 Assembler::tm_suspended};
2034 const bool tm_failure_inv[] = {false, true, false, false, false, false};
2035 assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
2036
2037 const Register addr_Reg = R0;
2038 // Keep track of offset to where rtm_counters_Reg had pointed to.
2039 int counters_offs = RTMLockingCounters::abort_count_offset();
2040 addi(addr_Reg, rtm_counters_Reg, counters_offs);
2041 const Register temp_Reg = rtm_counters_Reg;
2042
2043 //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
2044 ldx(temp_Reg, addr_Reg);
2045 addi(temp_Reg, temp_Reg, 1);
2046 stdx(temp_Reg, addr_Reg);
2047
2048 if (PrintPreciseRTMLockingStatistics) {
2049 int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
2050
2051 //mftexasr(abort_status); done by caller
2052 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
2053 counters_offs += counters_offs_delta;
2054 li(temp_Reg, counters_offs_delta); // can't use addi with R0
2055 add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
2056 counters_offs_delta = sizeof(uintx);
2057
2058 Label check_abort;
2059 rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
2060 if (tm_failure_inv[i]) {
2061 bne(CCR0, check_abort);
2062 } else {
2063 beq(CCR0, check_abort);
2064 }
2065 //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
2066 ldx(temp_Reg, addr_Reg);
2067 addi(temp_Reg, temp_Reg, 1);
2068 stdx(temp_Reg, addr_Reg);
2069 bind(check_abort);
2070 }
2071 }
2072 li(temp_Reg, -counters_offs); // can't use addi with R0
2073 add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
2074 }
2075
2076 // Branch if (random & (count-1) != 0), count is 2^n
2077 // tmp and CR0 are killed
2078 void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
2079 mftb(tmp);
2080 andi_(tmp, tmp, count-1);
2081 bne(CCR0, brLabel);
2082 }
2083
2084 // Perform abort ratio calculation, set no_rtm bit if high ratio.
2085 // input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED
2086 void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
2087 RTMLockingCounters* rtm_counters,
2088 Metadata* method_data) {
2089 Label L_done, L_check_always_rtm1, L_check_always_rtm2;
2090
2091 if (RTMLockingCalculationDelay > 0) {
2092 // Delay calculation.
2093 ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
2094 cmpdi(CCR0, rtm_counters_Reg, 0);
2095 beq(CCR0, L_done);
2096 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2097 }
2098 // Abort ratio calculation only if abort_count > RTMAbortThreshold.
2099 // Aborted transactions = abort_count * 100
2100 // All transactions = total_count * RTMTotalCountIncrRate
2101 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
2102 ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
2103 cmpdi(CCR0, R0, RTMAbortThreshold);
2104 blt(CCR0, L_check_always_rtm2);
2105 mulli(R0, R0, 100);
2106
2107 const Register tmpReg = rtm_counters_Reg;
2108 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2109 mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
2110 mulli(tmpReg, tmpReg, RTMAbortRatio);
2111 cmpd(CCR0, R0, tmpReg);
2112 blt(CCR0, L_check_always_rtm1); // jump to reload
2113 if (method_data != NULL) {
2114 // Set rtm_state to "no rtm" in MDO.
2115 // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
2116 // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
2117 load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
2118 atomic_ori_int(R0, tmpReg, NoRTM);
2119 }
2120 b(L_done);
2121
2122 bind(L_check_always_rtm1);
2123 load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
2124 bind(L_check_always_rtm2);
2125 ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
2126 cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
2127 blt(CCR0, L_done);
2128 if (method_data != NULL) {
2129 // Set rtm_state to "always rtm" in MDO.
2130 // Not using a metadata relocation. See above.
2131 load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
2132 atomic_ori_int(R0, tmpReg, UseRTM);
2133 }
2134 bind(L_done);
2135 }
2136
2137 // Update counters and perform abort ratio calculation.
2138 // input: abort_status_Reg
2139 void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
2140 RTMLockingCounters* rtm_counters,
2141 Metadata* method_data,
2142 bool profile_rtm) {
2143
2144 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2145 // Update rtm counters based on state at abort.
2146 // Reads abort_status_Reg, updates flags.
2147 assert_different_registers(abort_status_Reg, temp_Reg);
2148 load_const_optimized(temp_Reg, (address)rtm_counters, R0);
2149 rtm_counters_update(abort_status_Reg, temp_Reg);
2150 if (profile_rtm) {
2151 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2152 rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
2153 }
2154 }
2155
2156 // Retry on abort if abort's status indicates non-persistent failure.
2157 // inputs: retry_count_Reg
2158 // : abort_status_Reg
2159 // output: retry_count_Reg decremented by 1
2160 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
2161 Label& retryLabel, Label* checkRetry) {
2162 Label doneRetry;
2163 rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
2164 bne(CCR0, doneRetry);
2165 if (checkRetry) { bind(*checkRetry); }
2166 addic_(retry_count_Reg, retry_count_Reg, -1);
2167 blt(CCR0, doneRetry);
2168 smt_yield(); // Can't use wait(). No permission (SIGILL).
2169 b(retryLabel);
2170 bind(doneRetry);
2171 }
2172
2173 // Spin and retry if lock is busy.
2174 // inputs: box_Reg (monitor address)
2175 // : retry_count_Reg
2176 // output: retry_count_Reg decremented by 1
2177 // CTR is killed
2178 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
2179 Label SpinLoop, doneRetry;
2180 addic_(retry_count_Reg, retry_count_Reg, -1);
2181 blt(CCR0, doneRetry);
2182 li(R0, RTMSpinLoopCount);
2183 mtctr(R0);
2184
2185 bind(SpinLoop);
2186 smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
2187 bdz(retryLabel);
2188 ld(R0, 0, owner_addr_Reg);
2189 cmpdi(CCR0, R0, 0);
2190 bne(CCR0, SpinLoop);
2191 b(retryLabel);
2192
2193 bind(doneRetry);
2194 }
2195
2196 // Use RTM for normal stack locks.
2197 // Input: objReg (object to lock)
2198 void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
2199 Register obj, Register mark_word, Register tmp,
2200 Register retry_on_abort_count_Reg,
2201 RTMLockingCounters* stack_rtm_counters,
2202 Metadata* method_data, bool profile_rtm,
2203 Label& DONE_LABEL, Label& IsInflated) {
2204 assert(UseRTMForStackLocks, "why call this otherwise?");
2205 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2206 Label L_rtm_retry, L_decrement_retry, L_on_abort;
2207
2208 if (RTMRetryCount > 0) {
2209 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
2210 bind(L_rtm_retry);
2211 }
2212 andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
2213 bne(CCR0, IsInflated);
2214
2215 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2216 Label L_noincrement;
2217 if (RTMTotalCountIncrRate > 1) {
2218 branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
2219 }
2220 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
2221 load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
2222 //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
2223 ldx(mark_word, tmp);
2224 addi(mark_word, mark_word, 1);
2225 stdx(mark_word, tmp);
2226 bind(L_noincrement);
2227 }
2228 tbegin_();
2229 beq(CCR0, L_on_abort);
2230 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
2231 andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2232 cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
2233 beq(flag, DONE_LABEL); // all done if unlocked
2234
2235 if (UseRTMXendForLockBusy) {
2236 tend_();
2237 b(L_decrement_retry);
2238 } else {
2239 tabort_();
2240 }
2241 bind(L_on_abort);
2242 const Register abort_status_Reg = tmp;
2243 mftexasr(abort_status_Reg);
2244 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2245 rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
2246 }
2247 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
2248 if (RTMRetryCount > 0) {
2249 // Retry on lock abort if abort status is not permanent.
2250 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
2251 } else {
2252 bind(L_decrement_retry);
2253 }
2254 }
2255
2256 // Use RTM for inflating locks
2257 // inputs: obj (object to lock)
2258 // mark_word (current header - KILLED)
2259 // boxReg (on-stack box address (displaced header location) - KILLED)
2260 void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
2261 Register obj, Register mark_word, Register boxReg,
2262 Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
2263 RTMLockingCounters* rtm_counters,
2264 Metadata* method_data, bool profile_rtm,
2265 Label& DONE_LABEL) {
2266 assert(UseRTMLocking, "why call this otherwise?");
2267 Label L_rtm_retry, L_decrement_retry, L_on_abort;
2268 // Clean monitor_value bit to get valid pointer.
2269 int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
2270
2271 // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
2272 std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
2273 const Register tmpReg = boxReg;
2274 const Register owner_addr_Reg = mark_word;
2275 addi(owner_addr_Reg, mark_word, owner_offset);
2276
2277 if (RTMRetryCount > 0) {
2278 load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
2279 load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
2280 bind(L_rtm_retry);
2281 }
2282 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2283 Label L_noincrement;
2284 if (RTMTotalCountIncrRate > 1) {
2285 branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
2286 }
2287 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
2288 load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
2289 //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
2290 ldx(tmpReg, R0);
2291 addi(tmpReg, tmpReg, 1);
2292 stdx(tmpReg, R0);
2293 bind(L_noincrement);
2294 }
2295 tbegin_();
2296 beq(CCR0, L_on_abort);
2297 // We don't reload mark word. Will only be reset at safepoint.
2298 ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
2299 cmpdi(flag, R0, 0);
2300 beq(flag, DONE_LABEL);
2301
2302 if (UseRTMXendForLockBusy) {
2303 tend_();
2304 b(L_decrement_retry);
2305 } else {
2306 tabort_();
2307 }
2308 bind(L_on_abort);
2309 const Register abort_status_Reg = tmpReg;
2310 mftexasr(abort_status_Reg);
2311 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
2312 rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
2313 // Restore owner_addr_Reg
2314 ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
2315 #ifdef ASSERT
2316 andi_(R0, mark_word, markOopDesc::monitor_value);
2317 asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
2318 #endif
2319 addi(owner_addr_Reg, mark_word, owner_offset);
2320 }
2321 if (RTMRetryCount > 0) {
2322 // Retry on lock abort if abort status is not permanent.
2323 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
2324 }
2325
2326 // Appears unlocked - try to swing _owner from null to non-null.
2327 cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
2328 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2329 MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
2330
2331 if (RTMRetryCount > 0) {
2332 // success done else retry
2333 b(DONE_LABEL);
2334 bind(L_decrement_retry);
2335 // Spin and retry if lock is busy.
2336 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
2337 } else {
2338 bind(L_decrement_retry);
2339 }
2340 }
2341
2342 #endif // INCLUDE_RTM_OPT
2343
2344 // "The box" is the space on the stack where we copy the object mark.
2345 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
2346 Register temp, Register displaced_header, Register current_header,
2347 bool try_bias,
2348 RTMLockingCounters* rtm_counters,
2349 RTMLockingCounters* stack_rtm_counters,
2350 Metadata* method_data,
2351 bool use_rtm, bool profile_rtm) {
2352 assert_different_registers(oop, box, temp, displaced_header, current_header);
2353 assert(flag != CCR0, "bad condition register");
2354 Label cont;
2355 Label object_has_monitor;
2356 Label cas_failed;
2357
2358 // Load markOop from object into displaced_header.
2359 ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
2360
2361
2362 // Always do locking in runtime.
2363 if (EmitSync & 0x01) {
2364 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2365 return;
2366 }
2367
2368 if (try_bias) {
2369 biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
2370 }
2371
2372 #if INCLUDE_RTM_OPT
2373 if (UseRTMForStackLocks && use_rtm) {
2374 rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
2375 stack_rtm_counters, method_data, profile_rtm,
2376 cont, object_has_monitor);
2377 }
2378 #endif // INCLUDE_RTM_OPT
2379
2380 // Handle existing monitor.
2381 if ((EmitSync & 0x02) == 0) {
2382 // The object has an existing monitor iff (mark & monitor_value) != 0.
2383 andi_(temp, displaced_header, markOopDesc::monitor_value);
2384 bne(CCR0, object_has_monitor);
2385 }
2386
2387 // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
2388 ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
2389
2390 // Load Compare Value application register.
2391
2392 // Initialize the box. (Must happen before we update the object mark!)
2393 std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2394
2395 // Must fence, otherwise, preceding store(s) may float below cmpxchg.
2396 // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
2397 // CmpxchgX sets cr_reg to cmpX(current, displaced).
2398 membar(Assembler::StoreStore);
2399 cmpxchgd(/*flag=*/flag,
2416
2417 // Check if the owner is self by comparing the value in the markOop of object
2418 // (current_header) with the stack pointer.
2419 sub(current_header, current_header, R1_SP);
2420 load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
2421 markOopDesc::lock_mask_in_place));
2422
2423 and_(R0/*==0?*/, current_header, temp);
2424 // If condition is true we are cont and hence we can store 0 as the
2425 // displaced header in the box, which indicates that it is a recursive lock.
2426 mcrf(flag,CCR0);
2427 std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
2428
2429 // Handle existing monitor.
2430 if ((EmitSync & 0x02) == 0) {
2431 b(cont);
2432
2433 bind(object_has_monitor);
2434 // The object's monitor m is unlocked iff m->owner == NULL,
2435 // otherwise m->owner may contain a thread or a stack address.
2436
2437 #if INCLUDE_RTM_OPT
2438 // Use the same RTM locking code in 32- and 64-bit VM.
2439 if (use_rtm) {
2440 rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
2441 rtm_counters, method_data, profile_rtm, cont);
2442 } else {
2443 #endif // INCLUDE_RTM_OPT
2444
2445 // Try to CAS m->owner from NULL to current thread.
2446 addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
2447 li(displaced_header, 0);
2448 // CmpxchgX sets flag to cmpX(current, displaced).
2449 cmpxchgd(/*flag=*/flag,
2450 /*current_value=*/current_header,
2451 /*compare_value=*/(intptr_t)0,
2452 /*exchange_value=*/R16_thread,
2453 /*where=*/temp,
2454 MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
2455 MacroAssembler::cmpxchgx_hint_acquire_lock());
2456
2457 // Store a non-null value into the box.
2458 std(box, BasicLock::displaced_header_offset_in_bytes(), box);
2459
2460 # ifdef ASSERT
2461 bne(flag, cont);
2462 // We have acquired the monitor, check some invariants.
2463 addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
2464 // Invariant 1: _recursions should be 0.
2465 //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
2466 asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
2467 "monitor->_recursions should be 0", -1);
2468 // Invariant 2: OwnerIsThread shouldn't be 0.
2469 //assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
2470 //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
2471 // "monitor->OwnerIsThread shouldn't be 0", -1);
2472 # endif
2473
2474 #if INCLUDE_RTM_OPT
2475 } // use_rtm()
2476 #endif
2477 }
2478
2479 bind(cont);
2480 // flag == EQ indicates success
2481 // flag == NE indicates failure
2482 }
2483
2484 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
2485 Register temp, Register displaced_header, Register current_header,
2486 bool try_bias, bool use_rtm) {
2487 assert_different_registers(oop, box, temp, displaced_header, current_header);
2488 assert(flag != CCR0, "bad condition register");
2489 Label cont;
2490 Label object_has_monitor;
2491
2492 // Always do locking in runtime.
2493 if (EmitSync & 0x01) {
2494 cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
2495 return;
2496 }
2497
2498 if (try_bias) {
2499 biased_locking_exit(flag, oop, current_header, cont);
2500 }
2501
2502 #if INCLUDE_RTM_OPT
2503 if (UseRTMForStackLocks && use_rtm) {
2504 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
2505 Label L_regular_unlock;
2506 ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword
2507 andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
2508 cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked
2509 bne(flag, L_regular_unlock); // else RegularLock
2510 tend_(); // otherwise end...
2511 b(cont); // ... and we're done
2512 bind(L_regular_unlock);
2513 }
2514 #endif
2515
2516 // Find the lock address and load the displaced header from the stack.
2517 ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
2518
2519 // If the displaced header is 0, we have a recursive unlock.
2520 cmpdi(flag, displaced_header, 0);
2521 beq(flag, cont);
2522
2523 // Handle existing monitor.
2524 if ((EmitSync & 0x02) == 0) {
2525 // The object has an existing monitor iff (mark & monitor_value) != 0.
2526 RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
2527 ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
2528 andi_(R0, current_header, markOopDesc::monitor_value);
2529 bne(CCR0, object_has_monitor);
2530 }
2531
2532 // Check if it is still a light weight lock, this is is true if we see
2533 // the stack address of the basicLock in the markOop of the object.
2534 // Cmpxchg sets flag to cmpd(current_header, box).
2535 cmpxchgd(/*flag=*/flag,
2536 /*current_value=*/current_header,
2537 /*compare_value=*/box,
2538 /*exchange_value=*/displaced_header,
2539 /*where=*/oop,
2540 MacroAssembler::MemBarRel,
2541 MacroAssembler::cmpxchgx_hint_release_lock(),
2542 noreg,
2543 &cont);
2544
2545 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
2546
2547 // Handle existing monitor.
2548 if ((EmitSync & 0x02) == 0) {
2549 b(cont);
2550
2551 bind(object_has_monitor);
2552 addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
2553 ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2554
2555 // It's inflated.
2556 #if INCLUDE_RTM_OPT
2557 if (use_rtm) {
2558 Label L_regular_inflated_unlock;
2559 // Clean monitor_value bit to get valid pointer
2560 cmpdi(flag, temp, 0);
2561 bne(flag, L_regular_inflated_unlock);
2562 tend_();
2563 b(cont);
2564 bind(L_regular_inflated_unlock);
2565 }
2566 #endif
2567
2568 ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
2569 xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
2570 orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
2571 cmpdi(flag, temp, 0);
2572 bne(flag, cont);
2573
2574 ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
2575 ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
2576 orr(temp, temp, displaced_header); // Will be 0 if both are 0.
2577 cmpdi(flag, temp, 0);
2578 bne(flag, cont);
2579 release();
2580 std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
2581 }
2582
2583 bind(cont);
2584 // flag == EQ indicates success
2585 // flag == NE indicates failure
2586 }
2587
2831 // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
2832 #ifdef CC_INTERP
2833 ld(tmp1/*pc*/, _top_ijava_frame_abi(frame_manager_lr), sp);
2834 #else
2835 address entry = pc();
2836 load_const_optimized(tmp1, entry);
2837 #endif
2838
2839 set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1);
2840 }
2841
2842 void MacroAssembler::get_vm_result(Register oop_result) {
2843 // Read:
2844 // R16_thread
2845 // R16_thread->in_bytes(JavaThread::vm_result_offset())
2846 //
2847 // Updated:
2848 // oop_result
2849 // R16_thread->in_bytes(JavaThread::vm_result_offset())
2850
2851 verify_thread();
2852
2853 ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2854 li(R0, 0);
2855 std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
2856
2857 verify_oop(oop_result);
2858 }
2859
2860 void MacroAssembler::get_vm_result_2(Register metadata_result) {
2861 // Read:
2862 // R16_thread
2863 // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2864 //
2865 // Updated:
2866 // metadata_result
2867 // R16_thread->in_bytes(JavaThread::vm_result_2_offset())
2868
2869 ld(metadata_result, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2870 li(R0, 0);
2871 std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
2872 }
2873
2874 Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
2875 Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
2876 if (Universe::narrow_klass_base() != 0) {
2877 // Use dst as temp if it is free.
2878 sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
2879 current = dst;
2880 }
2881 if (Universe::narrow_klass_shift() != 0) {
2882 srdi(dst, current, Universe::narrow_klass_shift());
2883 current = dst;
2884 }
2885 return current;
2886 }
2887
2888 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
2889 if (UseCompressedClassPointers) {
2890 Register compressedKlass = encode_klass_not_null(ck, klass);
2891 stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
2892 } else {
2893 std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
2894 }
2895 }
2896
2897 void MacroAssembler::store_klass_gap(Register dst_oop, Register val) {
2898 if (UseCompressedClassPointers) {
2899 if (val == noreg) {
2900 val = R0;
2901 li(val, 0);
2902 }
2903 stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
2904 }
2905 }
2906
2907 int MacroAssembler::instr_size_for_decode_klass_not_null() {
2908 if (!UseCompressedClassPointers) return 0;
2909 int num_instrs = 1; // shift or move
2910 if (Universe::narrow_klass_base() != 0) num_instrs = 7; // shift + load const + add
2911 return num_instrs * BytesPerInstWord;
2912 }
2913
2914 void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
2915 assert(dst != R0, "Dst reg may not be R0, as R0 is used here.");
2916 if (src == noreg) src = dst;
2917 Register shifted_src = src;
2918 if (Universe::narrow_klass_shift() != 0 ||
2919 Universe::narrow_klass_base() == 0 && src != dst) { // Move required.
2920 shifted_src = dst;
2921 sldi(shifted_src, src, Universe::narrow_klass_shift());
2922 }
2923 if (Universe::narrow_klass_base() != 0) {
2924 add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
2925 }
2926 }
2927
2928 void MacroAssembler::load_klass(Register dst, Register src) {
2929 if (UseCompressedClassPointers) {
2930 lwz(dst, oopDesc::klass_offset_in_bytes(), src);
2931 // Attention: no null check here!
2932 decode_klass_not_null(dst, dst);
2933 } else {
2934 ld(dst, oopDesc::klass_offset_in_bytes(), src);
2935 }
2936 }
2937
2938 void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
2939 if (!os::zero_page_read_protected()) {
2940 if (TrapBasedNullChecks) {
2941 trap_null_check(src);
2942 }
2943 }
2944 load_klass(dst, src);
|