--- old/src/hotspot/share/opto/macroArrayCopy.cpp 2019-03-11 14:26:48.494354570 +0100 +++ new/src/hotspot/share/opto/macroArrayCopy.cpp 2019-03-11 14:26:48.290354573 +0100 @@ -139,6 +139,10 @@ return generate_guard(ctrl, test, region, PROB_UNLIKELY_MAG(3)); } +inline Node* PhaseMacroExpand::generate_fair_guard(Node** ctrl, Node* test, RegionNode* region) { + return generate_guard(ctrl, test, region, PROB_FAIR); +} + void PhaseMacroExpand::generate_negative_guard(Node** ctrl, Node* index, RegionNode* region) { if ((*ctrl)->is_top()) return; // already stopped @@ -184,6 +188,34 @@ return is_notp; } +Node* PhaseMacroExpand::generate_flattened_array_guard(Node** ctrl, Node* mem, Node* obj_or_klass, RegionNode* region) { + return generate_array_guard(ctrl, mem, obj_or_klass, region, Klass::_lh_array_tag_vt_value); +} + +Node* PhaseMacroExpand::generate_object_array_guard(Node** ctrl, Node* mem, Node* obj_or_klass, RegionNode* region) { + return generate_array_guard(ctrl, mem, obj_or_klass, region, Klass::_lh_array_tag_obj_value); +} + +Node* PhaseMacroExpand::generate_array_guard(Node** ctrl, Node* mem, Node* obj_or_klass, RegionNode* region, jint lh_con) { + if ((*ctrl)->is_top()) return NULL; + + Node* kls = NULL; + if (_igvn.type(obj_or_klass)->isa_oopptr()) { + Node* k_adr = basic_plus_adr(obj_or_klass, oopDesc::klass_offset_in_bytes()); + kls = transform_later(LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), k_adr, TypeInstPtr::KLASS)); + } else { + assert(_igvn.type(obj_or_klass)->isa_klassptr(), "what else?"); + kls = obj_or_klass; + } + Node* layout_val = make_load(NULL, mem, kls, in_bytes(Klass::layout_helper_offset()), TypeInt::INT, T_INT); + + layout_val = transform_later(new RShiftINode(layout_val, intcon(Klass::_lh_array_tag_shift))); + Node* cmp = transform_later(new CmpINode(layout_val, intcon(lh_con))); + Node* bol = transform_later(new BoolNode(cmp, BoolTest::eq)); + + return generate_fair_guard(ctrl, bol, region); +} + void PhaseMacroExpand::finish_arraycopy_call(Node* call, Node** ctrl, MergeMemNode** mem, const TypePtr* adr_type) { transform_later(call); @@ -236,6 +268,25 @@ return StubRoutines::select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized); } +bool PhaseMacroExpand::can_try_zeroing_elimination(AllocateArrayNode* alloc, + Node* src, + Node* dest) const { + const TypeAryPtr* top_dest = _igvn.type(dest)->isa_aryptr(); + + if (top_dest != NULL) { + if (top_dest->klass() == NULL) { + return false; + } + } + + return ReduceBulkZeroing + && !(UseTLAB && ZeroTLAB) // pointless if already zeroed + && !src->eqv_uncast(dest) + && alloc != NULL + && _igvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0 + && alloc->maybe_set_complete(&_igvn); +} + #define XTOP LP64_ONLY(COMMA top()) // Generate an optimized call to arraycopy. @@ -278,6 +329,7 @@ Node* src, Node* src_offset, Node* dest, Node* dest_offset, Node* copy_length, + Node* dest_length, bool disjoint_bases, bool length_never_negative, RegionNode* slow_region) { @@ -288,19 +340,16 @@ Node* original_dest = dest; bool dest_uninitialized = false; + Node* default_value = NULL; + Node* raw_default_value = NULL; // See if this is the initialization of a newly-allocated array. // If so, we will take responsibility here for initializing it to zero. // (Note: Because tightly_coupled_allocation performs checks on the // out-edges of the dest, we need to avoid making derived pointers // from it until we have checked its uses.) - if (ReduceBulkZeroing - && !(UseTLAB && ZeroTLAB) // pointless if already zeroed - && basic_elem_type != T_CONFLICT // avoid corner case - && !src->eqv_uncast(dest) - && alloc != NULL - && _igvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0 - && alloc->maybe_set_complete(&_igvn)) { + if (can_try_zeroing_elimination(alloc, src, dest) && + basic_elem_type != T_CONFLICT /* avoid corner case */) { // "You break it, you buy it." InitializeNode* init = alloc->initialization(); assert(init->is_complete(), "we just did this"); @@ -313,6 +362,8 @@ // Also, if this flag is set we make sure that arraycopy interacts properly // with G1, eliding pre-barriers. See CR 6627983. dest_uninitialized = true; + default_value = alloc->in(AllocateNode::DefaultValue); + raw_default_value = alloc->in(AllocateNode::RawDefaultValue); } else { // No zeroing elimination here. alloc = NULL; @@ -378,14 +429,15 @@ // copy_length is 0. if (dest_uninitialized) { assert(!local_ctrl->is_top(), "no ctrl?"); - Node* dest_length = alloc->in(AllocateNode::ALength); if (copy_length->eqv_uncast(dest_length) || _igvn.find_int_con(dest_length, 1) <= 0) { // There is no zeroing to do. No need for a secondary raw memory barrier. } else { // Clear the whole thing since there are no source elements to copy. generate_clear_array(local_ctrl, local_mem, - adr_type, dest, basic_elem_type, + adr_type, dest, + default_value, raw_default_value, + basic_elem_type, intcon(0), NULL, alloc->in(AllocateNode::AllocSize)); // Use a secondary InitializeNode as raw memory barrier. @@ -416,13 +468,14 @@ // The copy destination is the slice dest[off..off+len]. The other slices // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length]. Node* dest_size = alloc->in(AllocateNode::AllocSize); - Node* dest_length = alloc->in(AllocateNode::ALength); Node* dest_tail = transform_later( new AddINode(dest_offset, copy_length)); // If there is a head section that needs zeroing, do it now. if (_igvn.find_int_con(dest_offset, -1) != 0) { generate_clear_array(*ctrl, mem, - adr_type, dest, basic_elem_type, + adr_type, dest, + default_value, raw_default_value, + basic_elem_type, intcon(0), dest_offset, NULL); } @@ -471,7 +524,9 @@ *ctrl = tail_ctl; if (notail_ctl == NULL) { generate_clear_array(*ctrl, mem, - adr_type, dest, basic_elem_type, + adr_type, dest, + default_value, raw_default_value, + basic_elem_type, dest_tail, NULL, dest_size); } else { @@ -481,7 +536,9 @@ done_ctl->init_req(1, notail_ctl); done_mem->init_req(1, mem->memory_at(alias_idx)); generate_clear_array(*ctrl, mem, - adr_type, dest, basic_elem_type, + adr_type, dest, + default_value, raw_default_value, + basic_elem_type, dest_tail, NULL, dest_size); done_ctl->init_req(2, *ctrl); @@ -659,7 +716,9 @@ if (dest_uninitialized) { generate_clear_array(local_ctrl, local_mem, - adr_type, dest, basic_elem_type, + adr_type, dest, + default_value, raw_default_value, + basic_elem_type, intcon(0), NULL, alloc->in(AllocateNode::AllocSize)); } @@ -713,6 +772,12 @@ insert_mem_bar(ctrl, &out_mem, Op_MemBarStoreStore); } else if (InsertMemBarAfterArraycopy) { insert_mem_bar(ctrl, &out_mem, Op_MemBarCPUOrder); + } else if (adr_type == TypeRawPtr::BOTTOM) { + // Do not let reads from the destination float above the arraycopy. + // Since we cannot type the arrays, we don't know which slices + // might be affected. We could restrict this barrier only to those + // memory slices which pertain to array elements--but don't bother. + insert_mem_bar(ctrl, &out_mem, Op_MemBarCPUOrder); } _igvn.replace_node(_memproj_fallthrough, out_mem); @@ -757,6 +822,8 @@ void PhaseMacroExpand::generate_clear_array(Node* ctrl, MergeMemNode* merge_mem, const TypePtr* adr_type, Node* dest, + Node* val, + Node* raw_val, BasicType basic_elem_type, Node* slice_idx, Node* slice_len, @@ -772,6 +839,7 @@ Node* mem = merge_mem->memory_at(alias_idx); // memory slice to operate on // scaling and rounding of indexes: + assert(basic_elem_type != T_VALUETYPE, "should have been converted to a basic type copy"); int scale = exact_log2(type2aelembytes(basic_elem_type)); int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type); int clear_low = (-1 << scale) & (BytesPerInt - 1); @@ -795,12 +863,12 @@ if (start_con >= 0 && end_con >= 0) { // Constant start and end. Simple. - mem = ClearArrayNode::clear_memory(ctrl, mem, dest, + mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val, start_con, end_con, &_igvn); } else if (start_con >= 0 && dest_size != top()) { // Constant start, pre-rounded end after the tail of the array. Node* end = dest_size; - mem = ClearArrayNode::clear_memory(ctrl, mem, dest, + mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val, start_con, end, &_igvn); } else if (start_con >= 0 && slice_len != top()) { // Constant start, non-constant end. End needs rounding up. @@ -813,7 +881,7 @@ end_base += end_round; end = transform_later(new AddXNode(end, MakeConX(end_base)) ); end = transform_later(new AndXNode(end, MakeConX(~end_round)) ); - mem = ClearArrayNode::clear_memory(ctrl, mem, dest, + mem = ClearArrayNode::clear_memory(ctrl, mem, dest, val, raw_val, start_con, end, &_igvn); } else if (start_con < 0 && dest_size != top()) { // Non-constant start, pre-rounded end after the tail of the array. @@ -842,12 +910,18 @@ // Store a zero to the immediately preceding jint: Node* x1 = transform_later(new AddXNode(start, MakeConX(-bump_bit)) ); Node* p1 = basic_plus_adr(dest, x1); - mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered); + if (val == NULL) { + assert(raw_val == NULL, "val may not be null"); + mem = StoreNode::make(_igvn, ctrl, mem, p1, adr_type, intcon(0), T_INT, MemNode::unordered); + } else { + assert(_igvn.type(val)->isa_narrowoop(), "should be narrow oop"); + mem = new StoreNNode(ctrl, mem, p1, adr_type, val, MemNode::unordered); + } mem = transform_later(mem); } } Node* end = dest_size; // pre-rounded - mem = ClearArrayNode::clear_memory(ctrl, mem, dest, + mem = ClearArrayNode::clear_memory(ctrl, mem, dest, raw_val, start, end, &_igvn); } else { // Non-constant start, unrounded non-constant end. @@ -1082,6 +1156,37 @@ finish_arraycopy_call(call, ctrl, mem, adr_type); } +const TypePtr* PhaseMacroExpand::adjust_parameters_for_vt(const TypeAryPtr* top_dest, Node*& src_offset, + Node*& dest_offset, Node*& length, BasicType& dest_elem, + Node*& dest_length) { + assert(top_dest->klass()->is_value_array_klass(), "inconsistent"); + int elem_size = ((ciValueArrayKlass*)top_dest->klass())->element_byte_size(); + if (elem_size >= 8) { + if (elem_size > 8) { + // treat as array of long but scale length, src offset and dest offset + assert((elem_size % 8) == 0, "not a power of 2?"); + int factor = elem_size / 8; + length = transform_later(new MulINode(length, intcon(factor))); + src_offset = transform_later(new MulINode(src_offset, intcon(factor))); + dest_offset = transform_later(new MulINode(dest_offset, intcon(factor))); + if (dest_length != NULL) { + dest_length = transform_later(new MulINode(dest_length, intcon(factor))); + } + elem_size = 8; + } + dest_elem = T_LONG; + } else if (elem_size == 4) { + dest_elem = T_INT; + } else if (elem_size == 2) { + dest_elem = T_CHAR; + } else if (elem_size == 1) { + dest_elem = T_BYTE; + } else { + ShouldNotReachHere(); + } + return TypeRawPtr::BOTTOM; +} + void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) { Node* ctrl = ac->in(TypeFunc::Control); Node* io = ac->in(TypeFunc::I_O); @@ -1111,26 +1216,43 @@ return; } else if (ac->is_copyof() || ac->is_copyofrange() || ac->is_cloneoop()) { + const Type* dest_type = _igvn.type(dest); + const TypeAryPtr* top_dest = dest_type->isa_aryptr(); + + BasicType dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type(); + if (dest_elem == T_ARRAY || (dest_elem == T_VALUETYPE && top_dest->klass()->is_obj_array_klass())) { + dest_elem = T_OBJECT; + } + Node* mem = ac->in(TypeFunc::Memory); merge_mem = MergeMemNode::make(mem); transform_later(merge_mem); - RegionNode* slow_region = new RegionNode(1); - transform_later(slow_region); - AllocateArrayNode* alloc = NULL; if (ac->is_alloc_tightly_coupled()) { alloc = AllocateArrayNode::Ideal_array_allocation(dest, &_igvn); assert(alloc != NULL, "expect alloc"); } + assert(dest_elem != T_VALUETYPE || alloc != NULL, "unsupported"); + Node* dest_length = alloc != NULL ? alloc->in(AllocateNode::ALength) : NULL; - const TypePtr* adr_type = _igvn.type(dest)->is_oopptr()->add_offset(Type::OffsetBot); - if (ac->_dest_type != TypeOopPtr::BOTTOM) { - adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr(); + const TypePtr* adr_type = NULL; + + if (dest_elem == T_VALUETYPE) { + adr_type = adjust_parameters_for_vt(top_dest, src_offset, dest_offset, length, dest_elem, dest_length); + } else { + adr_type = _igvn.type(dest)->is_oopptr()->add_offset(Type::OffsetBot); + if (ac->_dest_type != TypeOopPtr::BOTTOM) { + adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr(); + } + if (ac->_src_type != ac->_dest_type) { + adr_type = TypeRawPtr::BOTTOM; + } } generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io, - adr_type, T_OBJECT, + adr_type, dest_elem, src, src_offset, dest, dest_offset, length, + dest_length, true, !ac->is_copyofrange()); return; @@ -1163,8 +1285,12 @@ if (top_src != NULL && top_src->klass() != NULL) { src_elem = top_src->klass()->as_array_klass()->element_type()->basic_type(); } - if (src_elem == T_ARRAY) src_elem = T_OBJECT; - if (dest_elem == T_ARRAY) dest_elem = T_OBJECT; + if (src_elem == T_ARRAY || (src_elem == T_VALUETYPE && top_src->klass()->is_obj_array_klass())) { + src_elem = T_OBJECT; + } + if (dest_elem == T_ARRAY || (dest_elem == T_VALUETYPE && top_dest->klass()->is_obj_array_klass())) { + dest_elem = T_OBJECT; + } if (ac->is_arraycopy_validated() && dest_elem != T_CONFLICT && @@ -1183,22 +1309,21 @@ transform_later(merge_mem); } + RegionNode* slow_region = new RegionNode(1); + transform_later(slow_region); + + generate_flattened_array_guard(&ctrl, merge_mem, dest, slow_region); + // Call StubRoutines::generic_arraycopy stub. Node* mem = generate_arraycopy(ac, NULL, &ctrl, merge_mem, &io, TypeRawPtr::BOTTOM, T_CONFLICT, src, src_offset, dest, dest_offset, length, + NULL, // If a negative length guard was generated for the ArrayCopyNode, // the length of the array can never be negative. - false, ac->has_negative_length_guard()); + false, ac->has_negative_length_guard(), + slow_region); - // Do not let reads from the destination float above the arraycopy. - // Since we cannot type the arrays, we don't know which slices - // might be affected. We could restrict this barrier only to those - // memory slices which pertain to array elements--but don't bother. - if (!InsertMemBarAfterArraycopy) { - // (If InsertMemBarAfterArraycopy, there is already one in place.) - insert_mem_bar(&ctrl, &mem, Op_MemBarCPUOrder); - } return; } @@ -1206,6 +1331,11 @@ // (2) src and dest arrays must have elements of the same BasicType // Figure out the size and type of the elements we will be copying. + // + // We have no stub to copy flattened value type arrays with oop + // fields if we need to emit write barriers. + // + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); if (src_elem != dest_elem || dest_elem == T_VOID) { // The component types are not the same or are not recognized. Punt. // (But, avoid the native method wrapper to JVM_ArrayCopy.) @@ -1233,12 +1363,15 @@ // (8) dest_offset + length must not exceed length of dest. // (9) each element of an oop array must be assignable - { - Node* mem = ac->in(TypeFunc::Memory); - merge_mem = MergeMemNode::make(mem); - transform_later(merge_mem); + Node* mem = ac->in(TypeFunc::Memory); + if (dest_elem == T_VALUETYPE) { + // copy modifies more than 1 slice + insert_mem_bar(&ctrl, &mem, Op_MemBarCPUOrder); } + merge_mem = MergeMemNode::make(mem); + transform_later(merge_mem); + RegionNode* slow_region = new RegionNode(1); transform_later(slow_region); @@ -1278,10 +1411,28 @@ // (9) each element of an oop array must be assignable // The generate_arraycopy subroutine checks this. + + if (dest_elem == T_OBJECT && + ValueArrayFlatten && + top_dest->elem()->make_oopptr()->can_be_value_type()) { + generate_flattened_array_guard(&ctrl, merge_mem, dest, slow_region); + } + + if (src_elem == T_OBJECT && + ValueArrayFlatten && + top_src->elem()->make_oopptr()->can_be_value_type()) { + generate_flattened_array_guard(&ctrl, merge_mem, src, slow_region); + } } + // This is where the memory effects are placed: const TypePtr* adr_type = NULL; - if (ac->_dest_type != TypeOopPtr::BOTTOM) { + + Node* dest_length = alloc != NULL ? alloc->in(AllocateNode::ALength) : NULL; + + if (dest_elem == T_VALUETYPE) { + adr_type = adjust_parameters_for_vt(top_dest, src_offset, dest_offset, length, dest_elem, dest_length); + } else if (ac->_dest_type != TypeOopPtr::BOTTOM) { adr_type = ac->_dest_type->add_offset(Type::OffsetBot)->is_ptr(); } else { adr_type = TypeAryPtr::get_array_body_type(dest_elem); @@ -1290,7 +1441,9 @@ generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io, adr_type, dest_elem, src, src_offset, dest, dest_offset, length, + dest_length, // If a negative length guard was generated for the ArrayCopyNode, // the length of the array can never be negative. - false, ac->has_negative_length_guard(), slow_region); + false, ac->has_negative_length_guard(), + slow_region); }