# HG changeset patch # User gromero # Date 1530908707 14400 # Fri Jul 06 16:25:07 2018 -0400 # Node ID 6d868b6e05b3a157f8ad016ed0c040b9c0208721 # Parent e0028bb6dd3df751bede76da156ebfcb0f487c60 8205582: PPC64: RTM: Fix counter for aborts on nested transactions diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp --- a/src/hotspot/cpu/ppc/assembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp @@ -1635,6 +1635,7 @@ // TEXASR bit description enum transaction_failure_reason { // Upper half (TEXASRU): + tm_failure_code = 0, // The Failure Code is copied from tabort or treclaim operand. tm_failure_persistent = 7, // The failure is likely to recur on each execution. tm_disallowed = 8, // The instruction is not permitted. tm_nesting_of = 9, // The maximum transaction level was exceeded. @@ -1650,6 +1651,7 @@ tm_failure_summary = 36, // Failure has been detected and recorded. tm_tfiar_exact = 37, // Value in the TFIAR is exact. tm_rot = 38, // Rollback-only transaction. + tm_transaction_level = 52, // Transaction level (nesting depth + 1). }; // PPC 1, section 2.4.1 Branch Instructions diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2412,7 +2412,7 @@ // Update rtm_counters based on abort status // input: abort_status -// rtm_counters (RTMLockingCounters*) +// rtm_counters_Reg (RTMLockingCounters*) void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) { // Mapping to keep PreciseRTMLockingStatistics similar to x86. // x86 ppc (! means inverted, ? means not the same) @@ -2422,52 +2422,113 @@ // 3 10 Set if an internal buffer overflowed. // 4 ?12 Set if a debug breakpoint was hit. // 5 ?32 Set if an abort occurred during execution of a nested transaction. - const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too. - Assembler::tm_failure_persistent, // inverted: transient - Assembler::tm_trans_cf, - Assembler::tm_footprint_of, - Assembler::tm_non_trans_cf, - Assembler::tm_suspended}; - const bool tm_failure_inv[] = {false, true, false, false, false, false}; - assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!"); - - const Register addr_Reg = R0; - // Keep track of offset to where rtm_counters_Reg had pointed to. + const int failure_bit[] = {tm_tabort, // Signal handler will set this too. + tm_failure_persistent, // Inverted: transient. + tm_non_trans_cf, + tm_trans_cf, + tm_footprint_of, + tm_failure_code, + tm_transaction_level}; + + const bool failure_logic_inv[] = {false, // tabort + true, // failure_persistent + false, // non_trans_cf + false, // trans_cf + false, // footprint_of + true, // failure_code + false}; // transaction_level + + const int num_failure_bits = sizeof(failure_bit) / sizeof(int); + const int num_counters = RTMLockingCounters::ABORT_STATUS_LIMIT; + + bool bit2counter_map[][num_counters] = + // counters: + // 0 1 2 3 4 5 + // abort , persist, conflict, overflow, debug , nested bits: + {{ true , false , false , false , false , false }, // abort + { false , true , false , false , false , false }, // failure_persistent + { false , false , true , false , false , false }, // non_trans_cf + { false , false , true , false , false , false }, // trans_cf + { false , false , false , true , false , false }, // footprint_of + { false , false , false , false , true , false }, // failure_code = 0xD4 + { false , false , false , false , false , true }}; // transaction_level > 1 + // ... + + // Move abort_status value to R0 and use abort_status register as a + // temporary register because R0 as third operand in ld/std is treated + // as base address zero (value). Likewise, R0 as second operand in addi + // is problematic because it amounts to li. + const Register temp_Reg = abort_status; + const Register abort_status_R0 = R0; + mr(abort_status_R0, abort_status); + + // Keep track of offsets added to rtm_counters_Reg to restore it back. int counters_offs = RTMLockingCounters::abort_count_offset(); - addi(addr_Reg, rtm_counters_Reg, counters_offs); - const Register temp_Reg = rtm_counters_Reg; - - //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically - ldx(temp_Reg, addr_Reg); + addi(rtm_counters_Reg, rtm_counters_Reg, counters_offs); + + // Increment total abort counter. + // atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically. + ldx(temp_Reg, rtm_counters_Reg); addi(temp_Reg, temp_Reg, 1); - stdx(temp_Reg, addr_Reg); - + stdx(temp_Reg, rtm_counters_Reg); + + // Increment specific abort counters. if (PrintPreciseRTMLockingStatistics) { - int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs; - - //mftexasr(abort_status); done by caller - for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { - counters_offs += counters_offs_delta; - li(temp_Reg, counters_offs_delta); // can't use addi with R0 - add(addr_Reg, addr_Reg, temp_Reg); // point to next counter - counters_offs_delta = sizeof(uintx); - - Label check_abort; - rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0); - if (tm_failure_inv[i]) { - bne(CCR0, check_abort); - } else { - beq(CCR0, check_abort); + + int abort_offs; + + abort_offs = RTMLockingCounters::abortX_count_offset() - counters_offs; + addi(rtm_counters_Reg, rtm_counters_Reg, abort_offs); + + // Keep track of offsets added to rtm_counters_Reg. + counters_offs += abort_offs; + + for (int nbit = 0; nbit < num_failure_bits; nbit++) { + for (int ncounter = 0; ncounter < num_counters; ncounter++) { + if (bit2counter_map[nbit][ncounter] == true) { + + Label check_abort; + + // Counter offset based on counter number (counter * 8 bytes). + abort_offs = ncounter << 3; + + if (failure_bit[nbit] == tm_transaction_level) { + // Don't check outer transaction, TL = 1 (bit 63). Hence only + // 11 bits in the TL field are checked to find out if failure + // occured in a nested transaction. This check also matches + // the case when nesting_of = 1 (nesting overflow). + rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10); + } else if (failure_bit[nbit] == tm_failure_code) { + // Check failure code for trap or illegal caught in TM. + // Bits 0:7 are tested as bit 7 (persistent) is copied from + // tabort or treclaim source operand. + // On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4). + rldicl(temp_Reg, abort_status_R0, 8, 56); + cmpdi(CCR0, temp_Reg, 0xD4); + } else { + rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0); + } + + if (failure_logic_inv[nbit] == true) { + bne(CCR0, check_abort); + } else { + beq(CCR0, check_abort); + } + + // We don't increment atomically. + ld(temp_Reg, abort_offs, rtm_counters_Reg); + addi(temp_Reg, temp_Reg, 1); + std(temp_Reg, abort_offs, rtm_counters_Reg); + + bind(check_abort); + } } - //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically - ldx(temp_Reg, addr_Reg); - addi(temp_Reg, temp_Reg, 1); - stdx(temp_Reg, addr_Reg); - bind(check_abort); } } - li(temp_Reg, -counters_offs); // can't use addi with R0 - add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore + + // Restore rtm_counters_Reg and abort_status. + addi(rtm_counters_Reg, rtm_counters_Reg, -counters_offs); + mr(abort_status, abort_status_R0); } // Branch if (random & (count-1) != 0), count is 2^n