--- old/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp 2016-05-13 15:25:49.046416007 +0300 +++ new/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp 2016-05-13 15:25:48.998416237 +0300 @@ -2563,15 +2563,21 @@ if (is_64bit) { __ cmpxchgd(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr, - MacroAssembler::MemBarFenceAfter, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, NULL, /*check without ldarx first*/true); } else { __ cmpxchgw(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr, - MacroAssembler::MemBarFenceAfter, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), noreg, /*check without ldarx first*/true); } + + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } } --- old/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp 2016-05-13 15:25:49.238415091 +0300 +++ new/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp 2016-05-13 15:25:49.190415321 +0300 @@ -1353,7 +1353,11 @@ } } - __ membar(); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ membar_acquire(); + } else { + __ membar(); + } } --- old/src/cpu/ppc/vm/macroAssembler_ppc.cpp 2016-05-13 15:25:49.422414213 +0300 +++ new/src/cpu/ppc/vm/macroAssembler_ppc.cpp 2016-05-13 15:25:49.370414460 +0300 @@ -1404,7 +1404,7 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint, - Register int_flag_success, bool contention_hint) { + Register int_flag_success, bool contention_hint, bool weak) { Label retry; Label failed; Label done; @@ -1414,6 +1414,7 @@ bool use_result_reg = (int_flag_success != noreg); bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value && int_flag_success != exchange_value && int_flag_success != addr_base); + assert(!weak || flag == CCR0, "weak only supported with CCR0"); if (use_result_reg && preset_result_reg) { li(int_flag_success, 0); // preset (assume cas failed) @@ -1445,10 +1446,12 @@ // fall through => (flag == eq), (dest_current_value == compare_value) stwcx_(exchange_value, addr_base); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0. - } else { - bne( CCR0, retry); // StXcx_ sets CCR0. + if (!weak || use_result_reg) { + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0. + } else { + bne( CCR0, weak ? failed : retry); // StXcx_ sets CCR0. + } } // fall through => (flag == eq), (dest_current_value == compare_value), (swapped) @@ -1498,7 +1501,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint, - Register int_flag_success, Label* failed_ext, bool contention_hint) { + Register int_flag_success, Label* failed_ext, bool contention_hint, bool weak) { Label retry; Label failed_int; Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int; @@ -1508,6 +1511,7 @@ bool use_result_reg = (int_flag_success!=noreg); bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() && int_flag_success!=exchange_value && int_flag_success!=addr_base); + assert(!weak || flag == CCR0, "weak only supported with CCR0"); assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); if (use_result_reg && preset_result_reg) { @@ -1538,10 +1542,12 @@ } stdcx_(exchange_value, addr_base); - if (UseStaticBranchPredictionInCompareAndSwapPPC64) { - bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 - } else { - bne( CCR0, retry); // stXcx_ sets CCR0 + if (!weak || use_result_reg || failed_ext) { + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, weak ? failed : retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, weak ? failed : retry); // stXcx_ sets CCR0 + } } // result in register (must do this at the end because int_flag_success can be the same register as one above) --- old/src/cpu/ppc/vm/macroAssembler_ppc.hpp 2016-05-13 15:25:49.626413237 +0300 +++ new/src/cpu/ppc/vm/macroAssembler_ppc.hpp 2016-05-13 15:25:49.578413467 +0300 @@ -430,11 +430,11 @@ void cmpxchgw(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint = false, - Register int_flag_success = noreg, bool contention_hint = false); + Register int_flag_success = noreg, bool contention_hint = false, bool weak = false); void cmpxchgd(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint = false, - Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false); + Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false, bool weak = false); // interface method calling void lookup_interface_method(Register recv_klass, --- old/src/cpu/ppc/vm/ppc.ad 2016-05-13 15:25:49.806412378 +0300 +++ new/src/cpu/ppc/vm/ppc.ad 2016-05-13 15:25:49.754412627 +0300 @@ -3083,7 +3083,11 @@ __ bne( CCR0, Lretry); } if (RegCollision) __ subf(Rres, Rsrc, Rtmp); - __ fence(); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ @@ -3108,7 +3112,11 @@ __ bne( CCR0, Lretry); } if (RegCollision) __ subf(Rres, Rsrc, Rtmp); - __ fence(); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ @@ -3132,7 +3140,11 @@ __ bne( CCR0, Lretry); } if (RegCollision) __ mr(Rres, Rtmp); - __ fence(); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ @@ -3156,7 +3168,11 @@ __ bne( CCR0, Lretry); } if (RegCollision) __ mr(Rres, Rtmp); - __ fence(); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} // This enc_class is needed so that scheduler gets proper @@ -7553,6 +7569,8 @@ // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be // matched. +// Strong versions: + instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); effect(TEMP cr0); @@ -7562,8 +7580,13 @@ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} ins_pipe(pipe_class_default); %} @@ -7577,8 +7600,13 @@ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} ins_pipe(pipe_class_default); %} @@ -7592,8 +7620,13 @@ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } %} ins_pipe(pipe_class_default); %} @@ -7607,12 +7640,312 @@ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarFenceAfter, MacroAssembler::cmpxchgx_hint_atomic_update(), + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +// Weak versions: + +instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and + // value is never passed to caller. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and + // value is never passed to caller. + __ cmpxchgw(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + // value is never passed to caller. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and + // value is never passed to caller. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ + match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP cr0); + format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + // Acquire only needed in successful case. Weak node is allowed to report unsuccessful in additional rare cases and + // value is never passed to caller. + __ cmpxchgd(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter, + MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, NULL, true, /*weak*/ true); + %} + ins_pipe(pipe_class_default); +%} + +// CompareAndExchange + +instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgw(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } %} ins_pipe(pipe_class_default); %} +instruct compareAndExchangeL_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeL_acq_regP_regL_regL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeL mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeP_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + %} + ins_pipe(pipe_class_default); +%} + +instruct compareAndExchangeP_acq_regP_regP_regP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ + match(Set res (CompareAndExchangeP mem_ptr (Binary src1 src2))); + predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + effect(TEMP_DEF res, TEMP cr0); + format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %} + // Variable size: instruction count smaller if regs are disjoint. + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'. + __ cmpxchgd(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, NULL, true); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +// Special RMW + instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddI mem_ptr src)); effect(TEMP cr0); --- old/src/share/vm/opto/library_call.cpp 2016-05-13 15:25:50.070411118 +0300 +++ new/src/share/vm/opto/library_call.cpp 2016-05-13 15:25:50.014411386 +0300 @@ -2697,6 +2697,7 @@ assert(sig->type_at(0)->basic_type() == T_OBJECT, "get and set base is object"); assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long"); assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta"); + assert(access_kind == Volatile, "mo is not passed to intrinsic nodes in current implementation"); #endif // ASSERT break; } @@ -2822,9 +2823,15 @@ case Acquire: break; case Release: - case Volatile: insert_mem_bar(Op_MemBarRelease); break; + case Volatile: + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); + } else { + insert_mem_bar(Op_MemBarRelease); + } + break; default: ShouldNotReachHere(); } @@ -3035,6 +3042,7 @@ case Acquire: case Volatile: insert_mem_bar(Op_MemBarAcquire); + // !support_IRIW_for_not_multiple_copy_atomic_cpu handled in platform code break; default: ShouldNotReachHere();