--- old/src/share/vm/opto/compile.cpp	2017-05-29 18:07:44.425025536 +0200
+++ new/src/share/vm/opto/compile.cpp	2017-05-29 18:07:44.338025630 +0200
@@ -2681,6 +2681,92 @@
   }
 }
 
+void Compile::value_type_return_from_mh_intrinsic(CallNode *call, Final_Reshape_Counts &frc) {
+  if (ValueTypeReturnedAsFields &&
+      call->is_CallStaticJava() &&
+      call->as_CallStaticJava()->method() != NULL &&
+      call->as_CallStaticJava()->method()->is_method_handle_intrinsic() &&
+      call->proj_out(TypeFunc::Parms) != NULL &&
+      call->proj_out(TypeFunc::Parms)->bottom_type()->isa_valuetypeptr()) {
+    // A value type is returned from the call but we don't know its
+    // type. One of the value being returned is the klass of the value
+    // type. We need to allocate a value type instance of that type
+    // and initialize it with other values being returned. This is
+    // done with the stub call below that we add right after this
+    // call.
+    Node* ret = call->proj_out(TypeFunc::Parms);
+    assert(ret->bottom_type()->is_valuetypeptr()->klass() == env()->___Value_klass(), "unexpected return type from MH intrinsic");
+    const TypeFunc* _tf = call->_tf;
+    const TypeTuple* domain = OptoRuntime::store_value_type_fields_Type()->domain_cc();
+    const TypeFunc* new_tf = TypeFunc::make(_tf->domain_sig(), _tf->domain_cc(), _tf->range_sig(), domain);
+    call->_tf = new_tf;
+
+    CallProjections projs;
+    call->extract_projections(&projs, true, true);
+    Node* ctl = projs.fallthrough_catchproj;
+    Node* mem = projs.fallthrough_memproj;
+    Node* io = projs.fallthrough_ioproj;
+    Node* ex_ctl = projs.catchall_catchproj;
+    Node* ex_mem = projs.catchall_memproj;
+    Node* ex_io = projs.catchall_ioproj;
+    CallStaticJavaNode* rt_call = new CallStaticJavaNode(OptoRuntime::store_value_type_fields_Type(),
+                                                         StubRoutines::store_value_type_fields_to_buf(),
+                                                         "store_value_type_fields",
+                                                         call->jvms()->bci(),
+                                                         TypePtr::BOTTOM);
+    Node* out_ctl = new ProjNode(rt_call, TypeFunc::Control);
+    Node* out_mem = new ProjNode(rt_call, TypeFunc::Memory);
+    Node* out_io = new ProjNode(rt_call, TypeFunc::I_O);
+    Node* res = new ProjNode(rt_call, TypeFunc::Parms);
+
+    Node* catc = new CatchNode(out_ctl, out_io, 2);
+    Node* norm = new CatchProjNode(catc, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci);
+    Node* excp = new CatchProjNode(catc, CatchProjNode::catch_all_index,    CatchProjNode::no_handler_bci);
+    Node* r = new RegionNode(3);
+    Node* mem_phi = new PhiNode(r, Type::MEMORY, TypePtr::BOTTOM);
+    Node* io_phi = new PhiNode(r, Type::ABIO);
+    r->init_req(1, excp);
+    mem_phi->init_req(1, out_mem);
+    io_phi->init_req(1, out_io);
+      
+    frc._visited.set(norm->_idx);
+    frc._visited.set(excp->_idx);
+
+    ctl->replace_by(norm);
+    mem->replace_by(out_mem);
+    io->replace_by(out_io);
+    ret->replace_by(res);
+    ex_ctl->replace_by(r);
+    ex_mem->replace_by(mem_phi);
+    ex_io->replace_by(io_phi);
+
+    r->init_req(2, ex_ctl);
+    mem_phi->init_req(2, ex_mem);
+    io_phi->init_req(2, ex_io);
+
+    rt_call->init_req(TypeFunc::Control, ctl);
+    rt_call->init_req(TypeFunc::Memory, mem);
+    rt_call->init_req(TypeFunc::I_O, io);
+    rt_call->init_req(TypeFunc::FramePtr, call->in(TypeFunc::FramePtr));
+    rt_call->init_req(TypeFunc::ReturnAdr, call->in(TypeFunc::ReturnAdr));
+
+    rt_call->init_req(TypeFunc::Parms, ret);
+    // We don't know how many values are returned. This assumes the
+    // worst case, that all available registers are used.
+    for (uint i = TypeFunc::Parms+1; i < domain->cnt(); i++) {
+      if (domain->field_at(i) == Type::HALF) {
+        rt_call->init_req(i, top());
+        continue;
+      }
+      Node* proj = new ProjNode(call, i);
+      rt_call->init_req(i, proj);
+    }
+
+    // We can safepoint at that new call
+    add_safepoint_edges(rt_call, call->jvms());
+  }
+}
+
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
 void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
@@ -2780,7 +2866,7 @@
     // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
     if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
       frc.inc_call_count();   // Count the call site
-    } else {                  // See if uncommon argument is shared
+    } else if (call->req() > TypeFunc::Parms) {                  // See if uncommon argument is shared
       Node *n = call->in(TypeFunc::Parms);
       int nop = n->Opcode();
       // Clone shared simple arguments to uncommon calls, item (1).
@@ -2795,6 +2881,7 @@
         call->set_req( TypeFunc::Parms, x );
       }
     }
+    value_type_return_from_mh_intrinsic(call, frc);
     break;
   }
 
@@ -4618,3 +4705,141 @@
     ni.dump();
   }
 }
+
+// Helper function for enforcing certain bytecodes to reexecute if
+// deoptimization happens
+static bool should_reexecute_implied_by_bytecode(JVMState *jvms, bool is_anewarray) {
+  ciMethod* cur_method = jvms->method();
+  int       cur_bci   = jvms->bci();
+  if (cur_method != NULL && cur_bci != InvocationEntryBci) {
+    Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci);
+    return Interpreter::bytecode_should_reexecute(code) ||
+           is_anewarray && code == Bytecodes::_multianewarray;
+    // Reexecute _multianewarray bytecode which was replaced with
+    // sequence of [a]newarray. See Parse::do_multianewarray().
+    //
+    // Note: interpreter should not have it set since this optimization
+    // is limited by dimensions and guarded by flag so in some cases
+    // multianewarray() runtime calls will be generated and
+    // the bytecode should not be reexecutes (stack will not be reset).
+  } else
+    return false;
+}
+
+void Compile::add_safepoint_edges(SafePointNode* call, JVMState* youngest_jvms, bool can_prune_locals, uint stack_slots_not_pruned) {
+  // do not scribble on the input jvms
+  JVMState* out_jvms = youngest_jvms->clone_deep(C);
+  call->set_jvms(out_jvms); // Start jvms list for call node
+
+  // For a known set of bytecodes, the interpreter should reexecute them if
+  // deoptimization happens. We set the reexecute state for them here
+  if (out_jvms->is_reexecute_undefined() && //don't change if already specified
+      should_reexecute_implied_by_bytecode(out_jvms, call->is_AllocateArray())) {
+    out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed
+  }
+
+  // Presize the call:
+  DEBUG_ONLY(uint non_debug_edges = call->req());
+  call->add_req_batch(top(), youngest_jvms->debug_depth());
+  assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
+
+  // Set up edges so that the call looks like this:
+  //  Call [state:] ctl io mem fptr retadr
+  //       [parms:] parm0 ... parmN
+  //       [root:]  loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  //    [...mid:]   loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...]
+  //       [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  // Note that caller debug info precedes callee debug info.
+
+  // Fill pointer walks backwards from "young:" to "root:" in the diagram above:
+  uint debug_ptr = call->req();
+
+  // Loop over the map input edges associated with jvms, add them
+  // to the call node, & reset all offsets to match call node array.
+  for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) {
+    uint debug_end   = debug_ptr;
+    uint debug_start = debug_ptr - in_jvms->debug_size();
+    debug_ptr = debug_start;  // back up the ptr
+
+    uint p = debug_start;  // walks forward in [debug_start, debug_end)
+    uint j, k, l;
+    SafePointNode* in_map = in_jvms->map();
+    out_jvms->set_map(call);
+
+    if (can_prune_locals) {
+      assert(in_jvms->method() == out_jvms->method(), "sanity");
+      // If the current throw can reach an exception handler in this JVMS,
+      // then we must keep everything live that can reach that handler.
+      // As a quick and dirty approximation, we look for any handlers at all.
+      if (in_jvms->method()->has_exception_handlers()) {
+        can_prune_locals = false;
+      }
+    }
+
+    // Add the Locals
+    k = in_jvms->locoff();
+    l = in_jvms->loc_size();
+    out_jvms->set_locoff(p);
+    if (!can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Expression Stack
+    k = in_jvms->stkoff();
+    l = in_jvms->sp();
+    out_jvms->set_stkoff(p);
+    if (!can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else if (can_prune_locals && stack_slots_not_pruned != 0) {
+      // Divide stack into {S0,...,S1}, where S0 is set to top.
+      uint s1 = stack_slots_not_pruned;
+      stack_slots_not_pruned = 0;  // for next iteration
+      if (s1 > l)  s1 = l;
+      uint s0 = l - s1;
+      p += s0;  // skip the tops preinstalled by add_req_batch
+      for (j = s0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Monitors
+    k = in_jvms->monoff();
+    l = in_jvms->mon_size();
+    out_jvms->set_monoff(p);
+    for (j = 0; j < l; j++)
+      call->set_req(p++, in_map->in(k+j));
+
+    // Copy any scalar object fields.
+    k = in_jvms->scloff();
+    l = in_jvms->scl_size();
+    out_jvms->set_scloff(p);
+    for (j = 0; j < l; j++)
+      call->set_req(p++, in_map->in(k+j));
+
+    // Finish the new jvms.
+    out_jvms->set_endoff(p);
+
+    assert(out_jvms->endoff()     == debug_end,             "fill ptr must match");
+    assert(out_jvms->depth()      == in_jvms->depth(),      "depth must match");
+    assert(out_jvms->loc_size()   == in_jvms->loc_size(),   "size must match");
+    assert(out_jvms->mon_size()   == in_jvms->mon_size(),   "size must match");
+    assert(out_jvms->scl_size()   == in_jvms->scl_size(),   "size must match");
+    assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match");
+
+    // Update the two tail pointers in parallel.
+    out_jvms = out_jvms->caller();
+    in_jvms  = in_jvms->caller();
+  }
+
+  assert(debug_ptr == non_debug_edges, "debug info must fit exactly");
+
+  // Test the correctness of JVMState::debug_xxx accessors:
+  assert(call->jvms()->debug_start() == non_debug_edges, "");
+  assert(call->jvms()->debug_end()   == call->req(), "");
+  assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
+}