src/share/vm/opto/library_call.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/opto

src/share/vm/opto/library_call.cpp

Print this page
rev 7589 : 6700100: optimize inline_native_clone() for small objects with exact klass
Summary: optimize small instance clones as loads/stores
Reviewed-by:


4458       base_off += sizeof(int);
4459     } else {
4460       // Include klass to copy by 8 bytes words.
4461       base_off = instanceOopDesc::klass_offset_in_bytes();
4462     }
4463     assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
4464   }
4465   src  = basic_plus_adr(src,  base_off);
4466   dest = basic_plus_adr(dest, base_off);
4467 
4468   // Compute the length also, if needed:
4469   Node* countx = size;
4470   countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off)));
4471   countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) ));
4472 
4473   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
4474 
4475   ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false);
4476   ac->set_clonebasic();
4477   Node* n = _gvn.transform(ac);
4478   assert(n == ac, "cannot disappear");
4479   set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);



4480 
4481   // If necessary, emit some card marks afterwards.  (Non-arrays only.)
4482   if (card_mark) {
4483     assert(!is_array, "");
4484     // Put in store barrier for any and all oops we are sticking
4485     // into this object.  (We could avoid this if we could prove
4486     // that the object type contains no oop fields at all.)
4487     Node* no_particular_value = NULL;
4488     Node* no_particular_field = NULL;
4489     int raw_adr_idx = Compile::AliasIdxRaw;
4490     post_barrier(control(),
4491                  memory(raw_adr_type),
4492                  alloc_obj,
4493                  no_particular_field,
4494                  raw_adr_idx,
4495                  no_particular_value,
4496                  T_OBJECT,
4497                  false);
4498   }
4499 


4524 // Allocation has two cases, and uses GraphKit::new_instance or new_array.
4525 //
4526 // Copying also has two cases, oop arrays and everything else.
4527 // Oop arrays use arrayof_oop_arraycopy (same as System.arraycopy).
4528 // Everything else uses the tight inline loop supplied by CopyArrayNode.
4529 //
4530 // These steps fold up nicely if and when the cloned object's klass
4531 // can be sharply typed as an object array, a type array, or an instance.
4532 //
4533 bool LibraryCallKit::inline_native_clone(bool is_virtual) {
4534   PhiNode* result_val;
4535 
4536   // Set the reexecute bit for the interpreter to reexecute
4537   // the bytecode that invokes Object.clone if deoptimization happens.
4538   { PreserveReexecuteState preexecs(this);
4539     jvms()->set_should_reexecute(true);
4540 
4541     Node* obj = null_check_receiver();
4542     if (stopped())  return true;
4543 




















4544     Node* obj_klass = load_object_klass(obj);
4545     const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
4546     const TypeOopPtr*   toop   = ((tklass != NULL)
4547                                 ? tklass->as_instance_type()
4548                                 : TypeInstPtr::NOTNULL);
4549 
4550     // Conservatively insert a memory barrier on all memory slices.
4551     // Do not let writes into the original float below the clone.
4552     insert_mem_bar(Op_MemBarCPUOrder);
4553 
4554     // paths into result_reg:
4555     enum {
4556       _slow_path = 1,     // out-of-line call to clone method (virtual or not)
4557       _objArray_path,     // plain array allocation, plus arrayof_oop_arraycopy
4558       _array_path,        // plain array allocation, plus arrayof_long_arraycopy
4559       _instance_path,     // plain instance allocation, plus arrayof_long_arraycopy
4560       PATH_LIMIT
4561     };
4562     RegionNode* result_reg = new RegionNode(PATH_LIMIT);
4563     result_val             = new PhiNode(result_reg, TypeInstPtr::NOTNULL);


4726     JVMState* old_jvms = alloc->jvms();
4727     JVMState* jvms = old_jvms->clone_shallow(C);
4728     uint size = alloc->req();
4729     sfpt = new SafePointNode(size, jvms);
4730     jvms->set_map(sfpt);
4731     for (uint i = 0; i < size; i++) {
4732       sfpt->init_req(i, alloc->in(i));
4733     }
4734     // re-push array length for deoptimization
4735     sfpt->ins_req(jvms->stkoff() + jvms->sp(), alloc->in(AllocateNode::ALength));
4736     jvms->set_sp(jvms->sp()+1);
4737     jvms->set_monoff(jvms->monoff()+1);
4738     jvms->set_scloff(jvms->scloff()+1);
4739     jvms->set_endoff(jvms->endoff()+1);
4740     jvms->set_should_reexecute(true);
4741 
4742     sfpt->set_i_o(map()->i_o());
4743     sfpt->set_memory(map()->memory());
4744   }
4745 
4746   bool notest = false;
4747 
4748   const Type* src_type  = _gvn.type(src);
4749   const Type* dest_type = _gvn.type(dest);
4750   const TypeAryPtr* top_src  = src_type->isa_aryptr();
4751   const TypeAryPtr* top_dest = dest_type->isa_aryptr();
4752 
4753   // Do we have the type of src?
4754   bool has_src = (top_src != NULL && top_src->klass() != NULL);
4755   // Do we have the type of dest?
4756   bool has_dest = (top_dest != NULL && top_dest->klass() != NULL);
4757   // Is the type for src from speculation?
4758   bool src_spec = false;
4759   // Is the type for dest from speculation?
4760   bool dest_spec = false;
4761 
4762   if (!has_src || !has_dest) {
4763     // We don't have sufficient type information, let's see if
4764     // speculative types can help. We need to have types for both src
4765     // and dest so that it pays off.
4766 


4830       if (!dest_spec) {
4831         dest_k = dest_type->speculative_type_not_null();
4832         if (dest_k != NULL && dest_k->is_array_klass()) {
4833           could_have_dest = true;
4834         }
4835       }
4836       if (could_have_src && could_have_dest) {
4837         // If we can have both exact types, emit the missing guards
4838         if (could_have_src && !src_spec) {
4839           src = maybe_cast_profiled_obj(src, src_k, true, sfpt);
4840         }
4841         if (could_have_dest && !dest_spec) {
4842           dest = maybe_cast_profiled_obj(dest, dest_k, true);
4843         }
4844       }
4845     }
4846   }
4847 
4848   if (!too_many_traps(Deoptimization::Reason_intrinsic) && !src->is_top() && !dest->is_top()) {
4849     // validate arguments: enables transformation the ArrayCopyNode
4850     notest = true;
4851 
4852     RegionNode* slow_region = new RegionNode(1);
4853     record_for_igvn(slow_region);
4854 
4855     // (1) src and dest are arrays.
4856     generate_non_array_guard(load_object_klass(src), slow_region);
4857     generate_non_array_guard(load_object_klass(dest), slow_region);
4858 
4859     // (2) src and dest arrays must have elements of the same BasicType
4860     // done at macro expansion or at Ideal transformation time
4861 
4862     // (4) src_offset must not be negative.
4863     generate_negative_guard(src_offset, slow_region);
4864 
4865     // (5) dest_offset must not be negative.
4866     generate_negative_guard(dest_offset, slow_region);
4867 
4868     // (7) src_offset + length must not exceed length of src.
4869     generate_limit_guard(src_offset, length,
4870                          load_array_length(src),


4905     } else {
4906       PreserveJVMState pjvms(this);
4907       set_control(_gvn.transform(slow_region));
4908       uncommon_trap(Deoptimization::Reason_intrinsic,
4909                     Deoptimization::Action_make_not_entrant);
4910       assert(stopped(), "Should be stopped");
4911     }
4912   }
4913 
4914   if (stopped()) {
4915     return true;
4916   }
4917 
4918   ArrayCopyNode* ac = ArrayCopyNode::make(this, true, src, src_offset, dest, dest_offset, length, alloc != NULL,
4919                                           // Create LoadRange and LoadKlass nodes for use during macro expansion here
4920                                           // so the compiler has a chance to eliminate them: during macro expansion,
4921                                           // we have to set their control (CastPP nodes are eliminated).
4922                                           load_object_klass(src), load_object_klass(dest),
4923                                           load_array_length(src), load_array_length(dest));
4924 
4925   if (notest) {
4926     ac->set_arraycopy_notest();
4927   }
4928 
4929   Node* n = _gvn.transform(ac);
4930   assert(n == ac, "cannot disappear");
4931   ac->connect_outputs(this);




4932 
4933   return true;
4934 }
4935 
4936 
4937 // Helper function which determines if an arraycopy immediately follows
4938 // an allocation, with no intervening tests or other escapes for the object.
4939 AllocateArrayNode*
4940 LibraryCallKit::tightly_coupled_allocation(Node* ptr,
4941                                            RegionNode* slow_region) {
4942   if (stopped())             return NULL;  // no fast path
4943   if (C->AliasLevel() == 0)  return NULL;  // no MergeMems around
4944 
4945   AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(ptr, &_gvn);
4946   if (alloc == NULL)  return NULL;
4947 
4948   Node* rawmem = memory(Compile::AliasIdxRaw);
4949   // Is the allocation's memory state untouched?
4950   if (!(rawmem->is_Proj() && rawmem->in(0)->is_Initialize())) {
4951     // Bail out if there have been raw-memory effects since the allocation.




4458       base_off += sizeof(int);
4459     } else {
4460       // Include klass to copy by 8 bytes words.
4461       base_off = instanceOopDesc::klass_offset_in_bytes();
4462     }
4463     assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
4464   }
4465   src  = basic_plus_adr(src,  base_off);
4466   dest = basic_plus_adr(dest, base_off);
4467 
4468   // Compute the length also, if needed:
4469   Node* countx = size;
4470   countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off)));
4471   countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) ));
4472 
4473   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
4474 
4475   ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false);
4476   ac->set_clonebasic();
4477   Node* n = _gvn.transform(ac);
4478   if (n == ac) {
4479     set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
4480   } else {
4481     set_all_memory(n);
4482   }
4483 
4484   // If necessary, emit some card marks afterwards.  (Non-arrays only.)
4485   if (card_mark) {
4486     assert(!is_array, "");
4487     // Put in store barrier for any and all oops we are sticking
4488     // into this object.  (We could avoid this if we could prove
4489     // that the object type contains no oop fields at all.)
4490     Node* no_particular_value = NULL;
4491     Node* no_particular_field = NULL;
4492     int raw_adr_idx = Compile::AliasIdxRaw;
4493     post_barrier(control(),
4494                  memory(raw_adr_type),
4495                  alloc_obj,
4496                  no_particular_field,
4497                  raw_adr_idx,
4498                  no_particular_value,
4499                  T_OBJECT,
4500                  false);
4501   }
4502 


4527 // Allocation has two cases, and uses GraphKit::new_instance or new_array.
4528 //
4529 // Copying also has two cases, oop arrays and everything else.
4530 // Oop arrays use arrayof_oop_arraycopy (same as System.arraycopy).
4531 // Everything else uses the tight inline loop supplied by CopyArrayNode.
4532 //
4533 // These steps fold up nicely if and when the cloned object's klass
4534 // can be sharply typed as an object array, a type array, or an instance.
4535 //
4536 bool LibraryCallKit::inline_native_clone(bool is_virtual) {
4537   PhiNode* result_val;
4538 
4539   // Set the reexecute bit for the interpreter to reexecute
4540   // the bytecode that invokes Object.clone if deoptimization happens.
4541   { PreserveReexecuteState preexecs(this);
4542     jvms()->set_should_reexecute(true);
4543 
4544     Node* obj = null_check_receiver();
4545     if (stopped())  return true;
4546 
4547     const TypeOopPtr* obj_type = _gvn.type(obj)->is_oopptr();
4548 
4549     // If we are going to clone an instance, we need its exact type to
4550     // know the number and types of fields to convert the clone to
4551     // loads/stores. Maybe a speculative type can help us.
4552     if (!obj_type->klass_is_exact() &&
4553         obj_type->speculative_type() != NULL &&
4554         obj_type->speculative_type()->is_instance_klass()) {
4555       ciInstanceKlass* spec_ik = obj_type->speculative_type()->as_instance_klass();
4556       if (spec_ik->nof_nonstatic_fields() <= ArrayCopyLoadStoreMaxElem &&
4557           !spec_ik->has_injected_fields()) {
4558         ciKlass* k = obj_type->klass();
4559         if (!k->is_instance_klass() ||
4560             k->as_instance_klass()->is_interface() ||
4561             k->as_instance_klass()->has_subklass()) {
4562           obj = maybe_cast_profiled_obj(obj, obj_type->speculative_type(), false);
4563         }
4564       }
4565     }
4566 
4567     Node* obj_klass = load_object_klass(obj);
4568     const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
4569     const TypeOopPtr*   toop   = ((tklass != NULL)
4570                                 ? tklass->as_instance_type()
4571                                 : TypeInstPtr::NOTNULL);
4572 
4573     // Conservatively insert a memory barrier on all memory slices.
4574     // Do not let writes into the original float below the clone.
4575     insert_mem_bar(Op_MemBarCPUOrder);
4576 
4577     // paths into result_reg:
4578     enum {
4579       _slow_path = 1,     // out-of-line call to clone method (virtual or not)
4580       _objArray_path,     // plain array allocation, plus arrayof_oop_arraycopy
4581       _array_path,        // plain array allocation, plus arrayof_long_arraycopy
4582       _instance_path,     // plain instance allocation, plus arrayof_long_arraycopy
4583       PATH_LIMIT
4584     };
4585     RegionNode* result_reg = new RegionNode(PATH_LIMIT);
4586     result_val             = new PhiNode(result_reg, TypeInstPtr::NOTNULL);


4749     JVMState* old_jvms = alloc->jvms();
4750     JVMState* jvms = old_jvms->clone_shallow(C);
4751     uint size = alloc->req();
4752     sfpt = new SafePointNode(size, jvms);
4753     jvms->set_map(sfpt);
4754     for (uint i = 0; i < size; i++) {
4755       sfpt->init_req(i, alloc->in(i));
4756     }
4757     // re-push array length for deoptimization
4758     sfpt->ins_req(jvms->stkoff() + jvms->sp(), alloc->in(AllocateNode::ALength));
4759     jvms->set_sp(jvms->sp()+1);
4760     jvms->set_monoff(jvms->monoff()+1);
4761     jvms->set_scloff(jvms->scloff()+1);
4762     jvms->set_endoff(jvms->endoff()+1);
4763     jvms->set_should_reexecute(true);
4764 
4765     sfpt->set_i_o(map()->i_o());
4766     sfpt->set_memory(map()->memory());
4767   }
4768 
4769   bool validated = false;
4770 
4771   const Type* src_type  = _gvn.type(src);
4772   const Type* dest_type = _gvn.type(dest);
4773   const TypeAryPtr* top_src  = src_type->isa_aryptr();
4774   const TypeAryPtr* top_dest = dest_type->isa_aryptr();
4775 
4776   // Do we have the type of src?
4777   bool has_src = (top_src != NULL && top_src->klass() != NULL);
4778   // Do we have the type of dest?
4779   bool has_dest = (top_dest != NULL && top_dest->klass() != NULL);
4780   // Is the type for src from speculation?
4781   bool src_spec = false;
4782   // Is the type for dest from speculation?
4783   bool dest_spec = false;
4784 
4785   if (!has_src || !has_dest) {
4786     // We don't have sufficient type information, let's see if
4787     // speculative types can help. We need to have types for both src
4788     // and dest so that it pays off.
4789 


4853       if (!dest_spec) {
4854         dest_k = dest_type->speculative_type_not_null();
4855         if (dest_k != NULL && dest_k->is_array_klass()) {
4856           could_have_dest = true;
4857         }
4858       }
4859       if (could_have_src && could_have_dest) {
4860         // If we can have both exact types, emit the missing guards
4861         if (could_have_src && !src_spec) {
4862           src = maybe_cast_profiled_obj(src, src_k, true, sfpt);
4863         }
4864         if (could_have_dest && !dest_spec) {
4865           dest = maybe_cast_profiled_obj(dest, dest_k, true);
4866         }
4867       }
4868     }
4869   }
4870 
4871   if (!too_many_traps(Deoptimization::Reason_intrinsic) && !src->is_top() && !dest->is_top()) {
4872     // validate arguments: enables transformation the ArrayCopyNode
4873     validated = true;
4874 
4875     RegionNode* slow_region = new RegionNode(1);
4876     record_for_igvn(slow_region);
4877 
4878     // (1) src and dest are arrays.
4879     generate_non_array_guard(load_object_klass(src), slow_region);
4880     generate_non_array_guard(load_object_klass(dest), slow_region);
4881 
4882     // (2) src and dest arrays must have elements of the same BasicType
4883     // done at macro expansion or at Ideal transformation time
4884 
4885     // (4) src_offset must not be negative.
4886     generate_negative_guard(src_offset, slow_region);
4887 
4888     // (5) dest_offset must not be negative.
4889     generate_negative_guard(dest_offset, slow_region);
4890 
4891     // (7) src_offset + length must not exceed length of src.
4892     generate_limit_guard(src_offset, length,
4893                          load_array_length(src),


4928     } else {
4929       PreserveJVMState pjvms(this);
4930       set_control(_gvn.transform(slow_region));
4931       uncommon_trap(Deoptimization::Reason_intrinsic,
4932                     Deoptimization::Action_make_not_entrant);
4933       assert(stopped(), "Should be stopped");
4934     }
4935   }
4936 
4937   if (stopped()) {
4938     return true;
4939   }
4940 
4941   ArrayCopyNode* ac = ArrayCopyNode::make(this, true, src, src_offset, dest, dest_offset, length, alloc != NULL,
4942                                           // Create LoadRange and LoadKlass nodes for use during macro expansion here
4943                                           // so the compiler has a chance to eliminate them: during macro expansion,
4944                                           // we have to set their control (CastPP nodes are eliminated).
4945                                           load_object_klass(src), load_object_klass(dest),
4946                                           load_array_length(src), load_array_length(dest));
4947 
4948   ac->set_arraycopy(validated);


4949 
4950   Node* n = _gvn.transform(ac);
4951   if (n == ac) {
4952     ac->connect_outputs(this);
4953   } else {
4954     assert(validated, "shouldn't transform if all arguments not validated");
4955     set_all_memory(n);
4956   }
4957 
4958   return true;
4959 }
4960 
4961 
4962 // Helper function which determines if an arraycopy immediately follows
4963 // an allocation, with no intervening tests or other escapes for the object.
4964 AllocateArrayNode*
4965 LibraryCallKit::tightly_coupled_allocation(Node* ptr,
4966                                            RegionNode* slow_region) {
4967   if (stopped())             return NULL;  // no fast path
4968   if (C->AliasLevel() == 0)  return NULL;  // no MergeMems around
4969 
4970   AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(ptr, &_gvn);
4971   if (alloc == NULL)  return NULL;
4972 
4973   Node* rawmem = memory(Compile::AliasIdxRaw);
4974   // Is the allocation's memory state untouched?
4975   if (!(rawmem->is_Proj() && rawmem->in(0)->is_Initialize())) {
4976     // Bail out if there have been raw-memory effects since the allocation.


src/share/vm/opto/library_call.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File