--- old/src/cpu/sparc/vm/vm_version_sparc.cpp 2017-07-24 10:39:03.417027694 +0530 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp 2017-07-24 10:39:03.248860154 +0530 @@ -140,10 +140,17 @@ if (is_niagara_plus()) { if (has_blk_init() && (cache_line_size > 0) && UseTLAB && FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { - // Use BIS instruction for TLAB allocation prefetch. - FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1); - if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { - FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3); + if (!has_sparc5_instr()) { + // Use BIS instruction for TLAB allocation prefetch. + // on Niagara plus processors other than those based on CoreS4 + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1); + } else { + // On CoreS4 processors use prefetch instruction + // to avoid partial RAW issue, also use prefetch style 3 + FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + } } if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { // Use smaller prefetch distance with BIS @@ -165,6 +172,11 @@ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); } if (AllocatePrefetchInstr == 1) { + + // Use allocation prefetch style 3 because BIS instructions + // require aligned memory addresses. + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3); + // Need a space at the end of TLAB for BIS since it // will fault when accessing memory outside of heap. --- old/src/share/vm/opto/macro.cpp 2017-07-24 10:39:03.965574197 +0530 +++ new/src/share/vm/opto/macro.cpp 2017-07-24 10:39:03.849458512 +0530 @@ -1775,7 +1775,7 @@ i_o = pf_phi_abio; } else if( UseTLAB && AllocatePrefetchStyle == 3 ) { // Insert a prefetch for each allocation. - // This code is used for Sparc with BIS. + // This code is used to generate 1 prefetch instruction per cache line. Node *pf_region = new (C) RegionNode(3); Node *pf_phi_rawmem = new (C) PhiNode( pf_region, Type::MEMORY, TypeRawPtr::BOTTOM ); @@ -1791,6 +1791,8 @@ transform_later(cache_adr); cache_adr = new (C) CastP2XNode(needgc_false, cache_adr); transform_later(cache_adr); + // Address is aligned to execute prefetch to the beginning of cache line size + // (it is important when BIS instruction is used on SPARC as prefetch). Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1)); cache_adr = new (C) AndXNode(cache_adr, mask); transform_later(cache_adr);