--- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	2016-12-13 09:36:25.138562899 +0100
+++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	2016-12-13 09:36:25.078563052 +0100
@@ -545,13 +545,122 @@
   __ bind(L);
 }
 
+// For each value type argument, sig includes the list of fields of
+// the value type. This utility function computes the number of
+// arguments for the call if value types are passed by reference (the
+// calling convention the interpreter expects).
+static int compute_total_args_passed_int(const GrowableArray<SigEntry>& sig_extended) {
+  int total_args_passed = 0;
+  if (ValueTypePassFieldsAsArgs) {
+    for (int i = 0; i < sig_extended.length(); i++) {
+      BasicType bt = sig_extended.at(i)._bt;
+      if (bt == T_VALUETYPE) {
+        // In sig_extended, a value type argument starts with:
+        // T_VALUETYPE, followed by the types of the fields of the
+        // value type and T_VOID to mark the end of the value
+        // type. Value types are flattened so, for instance, in the
+        // case of a value type with an int field and a value type
+        // field that itself has 2 fields, an int and a long:
+        // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second
+        // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID
+        // (outer T_VALUETYPE)
+        total_args_passed++;
+        int vt = 1;
+        do {
+          i++;
+          BasicType bt = sig_extended.at(i)._bt;
+          BasicType prev_bt = sig_extended.at(i-1)._bt;
+          if (bt == T_VALUETYPE) {
+            vt++;
+          } else if (bt == T_VOID &&
+                     prev_bt != T_LONG &&
+                     prev_bt != T_DOUBLE) {
+            vt--;
+          }
+        } while (vt != 0);
+      } else {
+        total_args_passed++;
+      }
+    }
+  } else {
+    total_args_passed = sig_extended.length();
+  }
+  return total_args_passed;
+}
+
+
+static void gen_c2i_adapter_helper(MacroAssembler *masm,
+                                   BasicType bt,
+                                   BasicType prev_bt,
+                                   const VMRegPair& reg_pair,
+                                   const Address& to,
+                                   int extraspace) {
+  assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
+  if (bt == T_VOID) {
+    assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
+    return;
+  }
+
+  // Say 4 args:
+  // i   st_off
+  // 0   32 T_LONG
+  // 1   24 T_VOID
+  // 2   16 T_OBJECT
+  // 3    8 T_BOOL
+  // -    0 return address
+  //
+  // However to make thing extra confusing. Because we can fit a long/double in
+  // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+  // leaves one slot empty and only stores to a single slot. In this case the
+  // slot that is occupied is the T_VOID slot. See I said it was confusing.
+
+  VMReg r_1 = reg_pair.first();
+  VMReg r_2 = reg_pair.second();
+  if (!r_1->is_valid()) {
+    assert(!r_2->is_valid(), "");
+    return;
+  }
+  if (r_1->is_stack()) {
+    // memory to memory use rax
+    int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+    if (!r_2->is_valid()) {
+      // sign extend??
+      __ movl(rax, Address(rsp, ld_off));
+      __ movl(to, rax);
 
+    } else {
+
+      __ movq(rax, Address(rsp, ld_off));
+      __ movq(to, rax);
+    }
+  } else if (r_1->is_Register()) {
+    Register r = r_1->as_Register();
+    if (!r_2->is_valid()) {
+      // must be only an int (or less ) so move only 32bits to slot
+      // why not sign extend??
+      __ movl(to, r);
+    } else {
+      __ movq(to, r);
+    }
+  } else {
+    assert(r_1->is_XMMRegister(), "");
+    if (!r_2->is_valid()) {
+      // only a float use just part of the slot
+      __ movflt(to, r_1->as_XMMRegister());
+    } else {
+      __ movdbl(to, r_1->as_XMMRegister());
+    }
+  }
+}
+      
 static void gen_c2i_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
+                            const GrowableArray<SigEntry>& sig_extended,
                             const VMRegPair *regs,
-                            Label& skip_fixup) {
+                            Label& skip_fixup,
+                            address start,
+                            OopMapSet*& oop_maps,
+                            int& frame_complete,
+                            int& frame_size_in_words) {
   // Before we get into the guts of the C2I adapter, see if we should be here
   // at all.  We've come from compiled code and are attempting to jump to the
   // interpreter, which means the caller made a static call to get here
@@ -561,11 +670,57 @@
 
   __ bind(skip_fixup);
 
+  if (ValueTypePassFieldsAsArgs) {
+    // Is there a value type arguments?
+    int i = 0;
+    for (; i < sig_extended.length() && sig_extended.at(i)._bt != T_VALUETYPE; i++);
+
+    if (i < sig_extended.length()) {
+      // There is at least a value type argument: we're coming from
+      // compiled code so we have no buffers to back the value
+      // types. Allocate the buffers here with a runtime call.
+      oop_maps = new OopMapSet();
+      OopMap* map = NULL;
+
+      map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+      
+      frame_complete = __ offset();
+
+      __ set_last_Java_frame(noreg, noreg, NULL);
+    
+      __ mov(c_rarg0, r15_thread);
+
+      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types)));
+
+      oop_maps->add_gc_map((int)(__ pc() - start), map);
+      __ reset_last_Java_frame(false, false);
+    
+      RegisterSaver::restore_live_registers(masm);
+
+      Label no_exception;
+      __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+      __ jcc(Assembler::equal, no_exception);
+
+      __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
+      __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
+      __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+      __ bind(no_exception);
+
+      // We get an array of objects from the runtime call
+      int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+      __ get_vm_result(r13, r15_thread);
+      __ addptr(r13, offset_in_bytes);
+      __ mov(r10, r13);
+    }
+  }
+
+
   // Since all args are passed on the stack, total_args_passed *
   // Interpreter::stackElementSize is the space we need. Plus 1 because
   // we also account for the return address location since
   // we store it first rather than hold it in rax across all the shuffling
-
+  int total_args_passed = compute_total_args_passed_int(sig_extended);
   int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
 
   // stack is aligned, keep it that way
@@ -583,96 +738,69 @@
   __ movptr(Address(rsp, 0), rax);
 
   // Now write the args into the outgoing interpreter space
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    // offset to start parameters
-    int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
-    int next_off = st_off - Interpreter::stackElementSize;
-
-    // Say 4 args:
-    // i   st_off
-    // 0   32 T_LONG
-    // 1   24 T_VOID
-    // 2   16 T_OBJECT
-    // 3    8 T_BOOL
-    // -    0 return address
-    //
-    // However to make thing extra confusing. Because we can fit a long/double in
-    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
-    // leaves one slot empty and only stores to a single slot. In this case the
-    // slot that is occupied is the T_VOID slot. See I said it was confusing.
-
-    VMReg r_1 = regs[i].first();
-    VMReg r_2 = regs[i].second();
-    if (!r_1->is_valid()) {
-      assert(!r_2->is_valid(), "");
-      continue;
-    }
-    if (r_1->is_stack()) {
-      // memory to memory use rax
-      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
-      if (!r_2->is_valid()) {
-        // sign extend??
-        __ movl(rax, Address(rsp, ld_off));
-        __ movptr(Address(rsp, st_off), rax);
 
-      } else {
-
-        __ movq(rax, Address(rsp, ld_off));
-
-        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-        // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-          // ld_off == LSW, ld_off+wordSize == MSW
-          // st_off == MSW, next_off == LSW
-          __ movq(Address(rsp, next_off), rax);
+  // next_arg_comp is the next argument from the compiler point of
+  // view (value type fields are passed in registers/on the stack). In
+  // sig_extended, a value type argument starts with: T_VALUETYPE,
+  // followed by the types of the fields of the value type and T_VOID
+  // to mark the end of the value type. ignored counts the number of
+  // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument:
+  // used to get the buffer for that argument from the pool of buffers
+  // we allocated above and want to pass to the
+  // interpreter. next_arg_int is the next argument from the
+  // interpreter point of view (value types are passed by reference).
+  for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
+       next_arg_comp < sig_extended.length(); next_arg_comp++) {
+    assert(ignored <= next_arg_comp, "shouldn't skip over more slot than there are arguments");
+    assert(next_arg_int < total_args_passed, "more arguments for the interpreter than expected?");
+    BasicType bt = sig_extended.at(next_arg_comp)._bt;
+    int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
+    if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
+      int next_off = st_off - Interpreter::stackElementSize;
+      const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : st_off;
+      gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
+                             regs[next_arg_comp-ignored], Address(rsp, offset), extraspace);
+      next_arg_int++;
 #ifdef ASSERT
-          // Overwrite the unused slot with known junk
-          __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
-          __ movptr(Address(rsp, st_off), rax);
-#endif /* ASSERT */
-        } else {
-          __ movq(Address(rsp, st_off), rax);
-        }
+      if (bt==T_LONG || bt==T_DOUBLE) {
+        // Overwrite the unused slot with known junk
+        __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
+        __ movptr(Address(rsp, st_off), rax);
       }
-    } else if (r_1->is_Register()) {
-      Register r = r_1->as_Register();
-      if (!r_2->is_valid()) {
-        // must be only an int (or less ) so move only 32bits to slot
-        // why not sign extend??
-        __ movl(Address(rsp, st_off), r);
-      } else {
-        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
-        // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-          // long/double in gpr
-#ifdef ASSERT
-          // Overwrite the unused slot with known junk
-          __ mov64(rax, CONST64(0xdeadffffdeadaaab));
-          __ movptr(Address(rsp, st_off), rax);
 #endif /* ASSERT */
-          __ movq(Address(rsp, next_off), r);
+    } else {
+      ignored++;
+      // get the buffer from the just allocated pool of buffers
+      __ load_heap_oop(r11, Address(r10, next_vt_arg * type2aelembytes(T_VALUETYPE)));
+      next_vt_arg++; next_arg_int++;
+      int vt = 1;
+      // write fields we get from compiled code in registers/stack
+      // slots to the buffer: we know we are done with that value type
+      // argument when we hit the T_VOID that acts as an end of value
+      // type delimiter for this value type. Value types are flattened
+      // so we might encounter a embedded value types. Each entry in
+      // sig_extended contains a field offset in the buffer.
+      do {
+        next_arg_comp++;
+        BasicType bt = sig_extended.at(next_arg_comp)._bt;
+        BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
+        if (bt == T_VALUETYPE) {
+          vt++;
+          ignored++;
+        } else if (bt == T_VOID &&
+                   prev_bt != T_LONG &&
+                   prev_bt != T_DOUBLE) {
+          vt--;
+          ignored++;
         } else {
-          __ movptr(Address(rsp, st_off), r);
+          int off = sig_extended.at(next_arg_comp)._offset;
+          assert(off > 0, "offset in object should be positive");
+          gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
+                                 regs[next_arg_comp-ignored], Address(r11, off), extraspace);
         }
-      }
-    } else {
-      assert(r_1->is_XMMRegister(), "");
-      if (!r_2->is_valid()) {
-        // only a float use just part of the slot
-        __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
-      } else {
-#ifdef ASSERT
-        // Overwrite the unused slot with known junk
-        __ mov64(rax, CONST64(0xdeadffffdeadaaac));
-        __ movptr(Address(rsp, st_off), rax);
-#endif /* ASSERT */
-        __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
-      }
+      } while (vt != 0);
+      // pass the buffer to the interpreter
+      __ movptr(Address(rsp, st_off), r11);
     }
   }
 
@@ -694,10 +822,83 @@
   __ bind(L_fail);
 }
 
+static void gen_i2c_adapter_helper(MacroAssembler *masm,
+                                   BasicType bt,
+                                   BasicType prev_bt,
+                                   const VMRegPair& reg_pair,
+                                   const Address& from) {
+  assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
+  if (bt == T_VOID) {
+    // Longs and doubles are passed in native word order, but misaligned
+    // in the 32-bit build.
+    assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
+    return;
+  }
+  // Pick up 0, 1 or 2 words from SP+offset.
+
+  assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(),
+         "scrambled load targets?");
+  //
+  //
+  //
+  VMReg r_1 = reg_pair.first();
+  VMReg r_2 = reg_pair.second();
+  if (!r_1->is_valid()) {
+    assert(!r_2->is_valid(), "");
+    return;
+  }
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset (+ wordSize to account for return address )
+    int st_off = reg_pair.first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
+
+    // We can use r13 as a temp here because compiled code doesn't need r13 as an input
+    // and if we end up going thru a c2i because of a miss a reasonable value of r13
+    // will be generated.
+    if (!r_2->is_valid()) {
+      // sign extend???
+      __ movl(r13, from);
+      __ movptr(Address(rsp, st_off), r13);
+    } else {
+      //
+      // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
+      // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
+      // So we must adjust where to pick up the data to match the interpreter.
+      //
+      // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+      // are accessed as negative so LSW is at LOW address
+
+      // ld_off is MSW so get LSW
+      __ movq(r13, from);
+      // st_off is LSW (i.e. reg.first())
+      __ movq(Address(rsp, st_off), r13);
+    }
+  } else if (r_1->is_Register()) {  // Register argument
+    Register r = r_1->as_Register();
+    assert(r != rax, "must be different");
+    if (r_2->is_valid()) {
+      //
+      // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
+      // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
+      // So we must adjust where to pick up the data to match the interpreter.
+
+      // this can be a misaligned move
+      __ movq(r, from);
+    } else {
+      // sign extend and use a full word?
+      __ movl(r, from);
+    }
+  } else {
+    if (!r_2->is_valid()) {
+      __ movflt(r_1->as_XMMRegister(), from);
+    } else {
+      __ movdbl(r_1->as_XMMRegister(), from);
+    }
+  }
+}
+
 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-                                    int total_args_passed,
                                     int comp_args_on_stack,
-                                    const BasicType *sig_bt,
+                                    const GrowableArray<SigEntry>& sig_extended,
                                     const VMRegPair *regs) {
 
   // Note: r13 contains the senderSP on entry. We must preserve it since
@@ -803,84 +1004,60 @@
   }
 #endif // INCLUDE_JVMCI
 
+  int total_args_passed = compute_total_args_passed_int(sig_extended);
   // Now generate the shuffle code.  Pick up all register args and move the
   // rest through the floating point stack top.
-  for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      // Longs and doubles are passed in native word order, but misaligned
-      // in the 32-bit build.
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
-      continue;
-    }
-
-    // Pick up 0, 1 or 2 words from SP+offset.
-
-    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
-            "scrambled load targets?");
-    // Load in argument order going down.
-    int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
-    // Point to interpreter value (vs. tag)
-    int next_off = ld_off - Interpreter::stackElementSize;
-    //
-    //
-    //
-    VMReg r_1 = regs[i].first();
-    VMReg r_2 = regs[i].second();
-    if (!r_1->is_valid()) {
-      assert(!r_2->is_valid(), "");
-      continue;
-    }
-    if (r_1->is_stack()) {
-      // Convert stack slot to an SP offset (+ wordSize to account for return address )
-      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
-
-      // We can use r13 as a temp here because compiled code doesn't need r13 as an input
-      // and if we end up going thru a c2i because of a miss a reasonable value of r13
-      // will be generated.
-      if (!r_2->is_valid()) {
-        // sign extend???
-        __ movl(r13, Address(saved_sp, ld_off));
-        __ movptr(Address(rsp, st_off), r13);
-      } else {
-        //
-        // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
-        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
-        // So we must adjust where to pick up the data to match the interpreter.
-        //
-        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
-        // are accessed as negative so LSW is at LOW address
-
-        // ld_off is MSW so get LSW
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
-        __ movq(r13, Address(saved_sp, offset));
-        // st_off is LSW (i.e. reg.first())
-        __ movq(Address(rsp, st_off), r13);
-      }
-    } else if (r_1->is_Register()) {  // Register argument
-      Register r = r_1->as_Register();
-      assert(r != rax, "must be different");
-      if (r_2->is_valid()) {
-        //
-        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
-        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
-        // So we must adjust where to pick up the data to match the interpreter.
-
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
 
-        // this can be a misaligned move
-        __ movq(r, Address(saved_sp, offset));
-      } else {
-        // sign extend and use a full word?
-        __ movl(r, Address(saved_sp, ld_off));
-      }
+  // next_arg_comp is the next argument from the compiler point of
+  // view (value type fields are passed in registers/on the stack). In
+  // sig_extended, a value type argument starts with: T_VALUETYPE,
+  // followed by the types of the fields of the value type and T_VOID
+  // to mark the end of the value type. ignored counts the number of
+  // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the
+  // interpreter point of view (value types are passed by reference).
+  for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig_extended.length(); next_arg_comp++) {
+    assert(ignored <= next_arg_comp, "shouldn't skip over more slot than there are arguments");
+    assert(next_arg_int < total_args_passed, "more arguments from the interpreter than expected?");
+    BasicType bt = sig_extended.at(next_arg_comp)._bt;
+    int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize;
+    if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
+      // Load in argument order going down.
+      // Point to interpreter value (vs. tag)
+      int next_off = ld_off - Interpreter::stackElementSize;
+      const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : ld_off;
+      gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended.at(next_arg_comp-1)._bt : T_ILLEGAL,
+                             regs[next_arg_comp-ignored], Address(saved_sp, offset));
+      next_arg_int++;
     } else {
-      if (!r_2->is_valid()) {
-        __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
-      } else {
-        __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
-      }
+      next_arg_int++;
+      ignored++;
+      // get the buffer for that value type
+      __ movptr(r10, Address(saved_sp, ld_off));
+      int vt = 1;
+      // load fields to registers/stack slots from the buffer: we know
+      // we are done with that value type argument when we hit the
+      // T_VOID that acts as an end of value type delimiter for this
+      // value type. Value types are flattened so we might encounter a
+      // embedded value types. Each entry in sig_extended contains a
+      // field offset in the buffer.
+      do {
+        next_arg_comp++;
+        BasicType bt = sig_extended.at(next_arg_comp)._bt;
+        BasicType prev_bt = sig_extended.at(next_arg_comp-1)._bt;
+        if (bt == T_VALUETYPE) {
+          vt++;
+          ignored++;
+        } else if (bt == T_VOID &&
+                   prev_bt != T_LONG &&
+                   prev_bt != T_DOUBLE) {
+          vt--;
+          ignored++;
+        } else {
+          int off = sig_extended.at(next_arg_comp)._offset;
+          assert(off > 0, "offset in object should be positive");
+          gen_i2c_adapter_helper(masm, bt, prev_bt, regs[next_arg_comp - ignored], Address(r10, off));
+        }
+      } while (vt != 0);
     }
   }
 
@@ -897,7 +1074,7 @@
   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
 
   // put Method* where a c2i would expect should we end up there
-  // only needed becaus eof c2 resolve stubs return Method* as a result in
+  // only needed because of c2 resolve stubs return Method* as a result in
   // rax
   __ mov(rax, rbx);
   __ jmp(r11);
@@ -905,14 +1082,14 @@
 
 // ---------------------------------------------------------------
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-                                                            int total_args_passed,
                                                             int comp_args_on_stack,
-                                                            const BasicType *sig_bt,
+                                                            const GrowableArray<SigEntry>& sig_extended,
                                                             const VMRegPair *regs,
-                                                            AdapterFingerPrint* fingerprint) {
+                                                            AdapterFingerPrint* fingerprint,
+                                                            AdapterBlob*& new_adapter) {
   address i2c_entry = __ pc();
 
-  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+  gen_i2c_adapter(masm, comp_args_on_stack, sig_extended, regs);
 
   // -------------------------------------------------------------------------
   // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
@@ -949,9 +1126,13 @@
 
   address c2i_entry = __ pc();
 
-  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+  OopMapSet* oop_maps = NULL;
+  int frame_complete = CodeOffsets::frame_never_safe;
+  int frame_size_in_words = 0;
+  gen_c2i_adapter(masm, sig_extended, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words);
 
   __ flush();
+  new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps);
   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 }
 
--- old/src/share/vm/ci/ciValueKlass.cpp	2016-12-13 09:36:25.503561973 +0100
+++ new/src/share/vm/ci/ciValueKlass.cpp	2016-12-13 09:36:25.441562130 +0100
@@ -132,3 +132,19 @@
     return vklass_h()->first_field_offset();
   )
 }
+
+// When passing field's fields as arguments, count the number of extra
+// argument slots that are needed
+int ciValueKlass::extra_value_args() {
+  // -1 because we count the number of extra args
+  int vt_extra = nof_nonstatic_fields() - 1;
+  for (int j = 0; j < nof_nonstatic_fields(); j++) {
+    ciField* f = nonstatic_field_at(j);
+    BasicType bt = f->type()->basic_type();
+    assert(bt != T_VALUETYPE, "embedded");
+    if (bt == T_LONG || bt == T_DOUBLE) {
+      vt_extra++;
+    }
+  }
+  return vt_extra;
+}
--- old/src/share/vm/ci/ciValueKlass.hpp	2016-12-13 09:36:25.814561183 +0100
+++ new/src/share/vm/ci/ciValueKlass.hpp	2016-12-13 09:36:25.754561335 +0100
@@ -66,6 +66,7 @@
   ciType*   field_type_by_index(int index);
   int       first_field_offset() const;
 
+  int extra_value_args();
 };
 
 #endif // SHARE_VM_CI_CIVALUEKLASS_HPP
--- old/src/share/vm/code/codeBlob.cpp	2016-12-13 09:36:26.128560386 +0100
+++ new/src/share/vm/code/codeBlob.cpp	2016-12-13 09:36:26.068560538 +0100
@@ -251,23 +251,27 @@
   MemoryService::track_code_cache_memory_usage();
 }
 
+BufferBlob::BufferBlob(const char* name, int size, CodeBuffer* cb, int frame_complete, int frame_size, OopMapSet* oop_maps)
+  : CodeBlob(name, cb, sizeof(BufferBlob), size, frame_complete, frame_size, oop_maps)
+{}
+
 
 //----------------------------------------------------------------------------------------------------
 // Implementation of AdapterBlob
 
-AdapterBlob::AdapterBlob(int size, CodeBuffer* cb) :
-  BufferBlob("I2C/C2I adapters", size, cb) {
+AdapterBlob::AdapterBlob(int size, CodeBuffer* cb, int frame_complete, int frame_size, OopMapSet* oop_maps) :
+  BufferBlob("I2C/C2I adapters", size, cb, frame_complete, frame_size, oop_maps) {
   CodeCache::commit(this);
 }
 
-AdapterBlob* AdapterBlob::create(CodeBuffer* cb) {
+AdapterBlob* AdapterBlob::create(CodeBuffer* cb, int frame_complete, int frame_size, OopMapSet* oop_maps) {
   ThreadInVMfromUnknown __tiv;  // get to VM state in case we block on CodeCache_lock
 
   AdapterBlob* blob = NULL;
   unsigned int size = allocation_size(cb, sizeof(AdapterBlob));
   {
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    blob = new (size) AdapterBlob(size, cb);
+    blob = new (size) AdapterBlob(size, cb, frame_complete, frame_size, oop_maps);
   }
   // Track memory usage statistic after releasing CodeCache_lock
   MemoryService::track_code_cache_memory_usage();
--- old/src/share/vm/code/codeBlob.hpp	2016-12-13 09:36:26.471559515 +0100
+++ new/src/share/vm/code/codeBlob.hpp	2016-12-13 09:36:26.409559672 +0100
@@ -235,6 +235,7 @@
   // Creation support
   BufferBlob(const char* name, int size);
   BufferBlob(const char* name, int size, CodeBuffer* cb);
+  BufferBlob(const char* name, int size, CodeBuffer* cb, int frame_complete, int frame_size, OopMapSet* oop_maps);
 
   void* operator new(size_t s, unsigned size) throw();
 
@@ -263,14 +264,19 @@
 
 class AdapterBlob: public BufferBlob {
 private:
-  AdapterBlob(int size, CodeBuffer* cb);
+  AdapterBlob(int size, CodeBuffer* cb, int frame_complete, int frame_size, OopMapSet* oop_maps);
 
 public:
   // Creation
-  static AdapterBlob* create(CodeBuffer* cb);
+  static AdapterBlob* create(CodeBuffer* cb,
+                             int frame_complete,
+                             int frame_size,
+                             OopMapSet* oop_maps);
 
   // Typing
   virtual bool is_adapter_blob() const { return true; }
+
+  bool caller_must_gc_arguments(JavaThread* thread) const { return true; }
 };
 
 
--- old/src/share/vm/interpreter/linkResolver.cpp	2016-12-13 09:36:26.792558700 +0100
+++ new/src/share/vm/interpreter/linkResolver.cpp	2016-12-13 09:36:26.728558862 +0100
@@ -1491,7 +1491,7 @@
     case Bytecodes::_invokestatic   : resolve_invokestatic   (result,       pool, index, CHECK); break;
     case Bytecodes::_invokespecial  : resolve_invokespecial  (result,       pool, index, CHECK); break;
     case Bytecodes::_invokevirtual  : resolve_invokevirtual  (result, recv, pool, index, CHECK); break;
-    case Bytecodes::_invokedirect   : resolve_invokevirtual  (result, recv, pool, index, CHECK); break; // temp hack
+    case Bytecodes::_invokedirect   : resolve_invokespecial  (result,       pool, index, CHECK); break; // temp hack
     case Bytecodes::_invokehandle   : resolve_invokehandle   (result,       pool, index, CHECK); break;
     case Bytecodes::_invokedynamic  : resolve_invokedynamic  (result,       pool, index, CHECK); break;
     case Bytecodes::_invokeinterface: resolve_invokeinterface(result, recv, pool, index, CHECK); break;
--- old/src/share/vm/opto/buildOopMap.cpp	2016-12-13 09:36:27.190557689 +0100
+++ new/src/share/vm/opto/buildOopMap.cpp	2016-12-13 09:36:27.126557852 +0100
@@ -260,7 +260,7 @@
           // Outgoing argument GC mask responsibility belongs to the callee,
           // not the caller.  Inspect the inputs to the call, to see if
           // this live-range is one of them.
-          uint cnt = mcall->tf()->domain()->cnt();
+          uint cnt = mcall->tf()->domain_cc()->cnt();
           uint j;
           for( j = TypeFunc::Parms; j < cnt; j++)
             if( mcall->in(j) == def )
@@ -330,7 +330,7 @@
           // Outgoing argument GC mask responsibility belongs to the callee,
           // not the caller.  Inspect the inputs to the call, to see if
           // this live-range is one of them.
-        uint cnt = mcall->tf()->domain()->cnt();
+        uint cnt = mcall->tf()->domain_cc()->cnt();
         uint j;
         for( j = TypeFunc::Parms; j < cnt; j++)
           if( mcall->in(j) == def )
--- old/src/share/vm/opto/callGenerator.cpp	2016-12-13 09:36:27.522556846 +0100
+++ new/src/share/vm/opto/callGenerator.cpp	2016-12-13 09:36:27.458557009 +0100
@@ -154,8 +154,10 @@
   _call_node = call;  // Save the call node in case we need it later
   if (!is_static) {
     if (kit.argument(0)->is_ValueType()) {
-      ValueTypeNode* vt = kit.argument(0)->as_ValueType();
-      vt->store_to_memory(&kit);
+      if (!ValueTypePassFieldsAsArgs) {
+        ValueTypeNode* vt = kit.argument(0)->as_ValueType();
+        vt->store_to_memory(&kit);
+      }
     } else {
       // Make an explicit receiver null_check as part of this call.
       // Since we share a map with the caller, his JVMS gets adjusted.
@@ -369,8 +371,12 @@
       call->in(0) == NULL || call->in(0)->is_top()) {
     return;
   }
-
-  const TypeTuple *r = call->tf()->domain();
+  
+  // FIXME: late inlining of methods that take value type arguments is
+  // broken: arguments at the call are set up so fields of value type
+  // arguments are passed but code here expects a single argument per
+  // value type (a ValueTypeNode) instead.
+  const TypeTuple *r = call->tf()->domain_sig();
   for (int i1 = 0; i1 < method()->arg_size(); i1++) {
     if (call->in(TypeFunc::Parms + i1)->is_top() && r->field_at(TypeFunc::Parms + i1) != Type::HALF) {
       assert(Compile::current()->inlining_incrementally(), "shouldn't happen during parsing");
--- old/src/share/vm/opto/callnode.cpp	2016-12-13 09:36:27.850556013 +0100
+++ new/src/share/vm/opto/callnode.cpp	2016-12-13 09:36:27.787556173 +0100
@@ -743,7 +743,7 @@
 bool CallNode::may_modify(const TypeOopPtr *t_oop, PhaseTransform *phase) {
   assert((t_oop != NULL), "sanity");
   if (is_call_to_arraycopystub() && strcmp(_name, "unsafe_arraycopy") != 0) {
-    const TypeTuple* args = _tf->domain();
+    const TypeTuple* args = _tf->domain_sig();
     Node* dest = NULL;
     // Stubs that can be called once an ArrayCopyNode is expanded have
     // different signatures. Look for the second pointer argument,
@@ -791,7 +791,7 @@
           return true;
         }
       }
-      const TypeTuple* d = tf()->domain();
+      const TypeTuple* d = tf()->domain_cc();
       for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
         const TypeInstPtr* inst_t = d->field_at(i)->isa_instptr();
         if ((inst_t != NULL) && (!inst_t->klass_is_exact() ||
@@ -807,7 +807,7 @@
 
 // Does this call have a direct reference to n other than debug information?
 bool CallNode::has_non_debug_use(Node *n) {
-  const TypeTuple * d = tf()->domain();
+  const TypeTuple * d = tf()->domain_cc();
   for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
     Node *arg = in(i);
     if (arg == n) {
--- old/src/share/vm/opto/callnode.hpp	2016-12-13 09:36:28.179555178 +0100
+++ new/src/share/vm/opto/callnode.hpp	2016-12-13 09:36:28.116555338 +0100
@@ -572,7 +572,7 @@
   const char *_name;           // Printable name, if _method is NULL
 
   CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type)
-    : SafePointNode(tf->domain()->cnt(), NULL, adr_type),
+    : SafePointNode(tf->domain_cc()->cnt(), NULL, adr_type),
       _tf(tf),
       _entry_point(addr),
       _cnt(COUNT_UNKNOWN),
@@ -1069,7 +1069,7 @@
 
     const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
 
-    return TypeFunc::make(domain,range);
+    return TypeFunc::make(domain, range);
   }
 
   virtual int Opcode() const;
--- old/src/share/vm/opto/chaitin.cpp	2016-12-13 09:36:28.514554327 +0100
+++ new/src/share/vm/opto/chaitin.cpp	2016-12-13 09:36:28.450554490 +0100
@@ -2190,7 +2190,7 @@
 
 void PhaseChaitin::dump_frame() const {
   const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
-  const TypeTuple *domain = C->tf()->domain();
+  const TypeTuple *domain = C->tf()->domain_cc();
   const int        argcnt = domain->cnt() - TypeFunc::Parms;
 
   // Incoming arguments in registers dump
--- old/src/share/vm/opto/compile.cpp	2016-12-13 09:36:28.848553479 +0100
+++ new/src/share/vm/opto/compile.cpp	2016-12-13 09:36:28.785553639 +0100
@@ -754,7 +754,7 @@
     } else {
       // Normal case.
       init_tf(TypeFunc::make(method()));
-      StartNode* s = new StartNode(root(), tf()->domain());
+      StartNode* s = new StartNode(root(), tf()->domain_cc());
       initial_gvn()->set_type_bottom(s);
       init_start(s);
       if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
--- old/src/share/vm/opto/doCall.cpp	2016-12-13 09:36:29.212552555 +0100
+++ new/src/share/vm/opto/doCall.cpp	2016-12-13 09:36:29.145552725 +0100
@@ -562,7 +562,7 @@
 
   // Speculative type of the receiver if any
   ciKlass* speculative_receiver_type = NULL;
-  if (is_virtual_or_interface || is_direct) {
+  if (is_virtual_or_interface) {
     Node* receiver_node             = stack(sp() - nargs);
     const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
     // call_does_dispatch and vtable_index are out-parameters.  They might be changed.
@@ -1023,9 +1023,9 @@
       }
     } else if (is_inline) {
       switch (bc()) {
-      case Bytecodes::_invokedirect:
       case Bytecodes::_invokevirtual:   increment_counter(SharedRuntime::nof_inlined_calls_addr()); break;
       case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_inlined_interface_calls_addr()); break;
+      case Bytecodes::_invokedirect:
       case Bytecodes::_invokestatic:
       case Bytecodes::_invokedynamic:
       case Bytecodes::_invokespecial:   increment_counter(SharedRuntime::nof_inlined_static_calls_addr()); break;
@@ -1033,9 +1033,9 @@
       }
     } else {
       switch (bc()) {
-      case Bytecodes::_invokedirect:
       case Bytecodes::_invokevirtual:   increment_counter(SharedRuntime::nof_normal_calls_addr()); break;
       case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_interface_calls_addr()); break;
+      case Bytecodes::_invokedirect:
       case Bytecodes::_invokestatic:
       case Bytecodes::_invokedynamic:
       case Bytecodes::_invokespecial:   increment_counter(SharedRuntime::nof_static_calls_addr()); break;
--- old/src/share/vm/opto/escape.cpp	2016-12-13 09:36:29.544551712 +0100
+++ new/src/share/vm/opto/escape.cpp	2016-12-13 09:36:29.482551870 +0100
@@ -882,7 +882,7 @@
         ptnode_adr(call_idx)->set_scalar_replaceable(false);
       } else {
         // Determine whether any arguments are returned.
-        const TypeTuple* d = call->tf()->domain();
+        const TypeTuple* d = call->tf()->domain_sig();
         bool ret_arg = false;
         for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
           if (d->field_at(i)->isa_ptr() != NULL &&
@@ -929,7 +929,7 @@
     case Op_CallLeaf: {
       // Stub calls, objects do not escape but they are not scale replaceable.
       // Adjust escape state for outgoing arguments.
-      const TypeTuple * d = call->tf()->domain();
+      const TypeTuple * d = call->tf()->domain_sig();
       bool src_has_oops = false;
       for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
         const Type* at = d->field_at(i);
@@ -1057,11 +1057,16 @@
       // fall-through if not a Java method or no analyzer information
       if (call_analyzer != NULL) {
         PointsToNode* call_ptn = ptnode_adr(call->_idx);
-        const TypeTuple* d = call->tf()->domain();
+        const TypeTuple* d = call->tf()->domain_sig();
+        int extra = 0;
         for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
           const Type* at = d->field_at(i);
+          if (at->isa_valuetypeptr()) {
+            extra += at->is_valuetypeptr()->value_type()->value_klass()->field_count() - 1;
+            continue;
+          }
           int k = i - TypeFunc::Parms;
-          Node* arg = call->in(i);
+          Node* arg = call->in(i + extra);
           PointsToNode* arg_ptn = ptnode_adr(arg->_idx);
           if (at->isa_ptr() != NULL &&
               call_analyzer->is_arg_returned(k)) {
@@ -1101,7 +1106,7 @@
       // Fall-through here if not a Java method or no analyzer information
       // or some other type of call, assume the worst case: all arguments
       // globally escape.
-      const TypeTuple* d = call->tf()->domain();
+      const TypeTuple* d = call->tf()->domain_sig();
       for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
         const Type* at = d->field_at(i);
         if (at->isa_oopptr() != NULL) {
--- old/src/share/vm/opto/generateOptoStub.cpp	2016-12-13 09:36:29.887550841 +0100
+++ new/src/share/vm/opto/generateOptoStub.cpp	2016-12-13 09:36:29.824551001 +0100
@@ -46,7 +46,7 @@
                         bool return_pc) {
   ResourceMark rm;
 
-  const TypeTuple *jdomain = C->tf()->domain();
+  const TypeTuple *jdomain = C->tf()->domain_sig();
   const TypeTuple *jrange  = C->tf()->range();
 
   // The procedure start
--- old/src/share/vm/opto/graphKit.cpp	2016-12-13 09:36:30.220549996 +0100
+++ new/src/share/vm/opto/graphKit.cpp	2016-12-13 09:36:30.145550186 +0100
@@ -1717,13 +1717,25 @@
 void GraphKit::set_arguments_for_java_call(CallJavaNode* call) {
   // Add the call arguments:
   uint nargs = call->method()->arg_size();
-  for (uint i = 0; i < nargs; i++) {
+  for (uint i = 0, idx = 0; i < nargs; i++) {
     Node* arg = argument(i);
-    if (arg->is_ValueType()) {
-      // Pass value type argument via oop to callee
-      arg = arg->as_ValueType()->store_to_memory(this);
+    if (ValueTypePassFieldsAsArgs) {
+      if (arg->is_ValueType()) {
+        ValueTypeNode* vt = arg->as_ValueType();
+        // We don't pass value type arguments by reference but instead
+        // pass each field of the value type
+        idx += vt->set_arguments_for_java_call(call, idx + TypeFunc::Parms, *this);
+      } else {
+        call->init_req(idx + TypeFunc::Parms, arg);
+        idx++;
+      }
+    } else {
+      if (arg->is_ValueType()) {
+        // Pass value type argument via oop to callee
+        arg = arg->as_ValueType()->store_to_memory(this);
+      }
+      call->init_req(i + TypeFunc::Parms, arg);
     }
-    call->init_req(i + TypeFunc::Parms, arg);
   }
 }
 
@@ -2127,9 +2139,9 @@
 void GraphKit::round_double_arguments(ciMethod* dest_method) {
   // (Note:  TypeFunc::make has a cache that makes this fast.)
   const TypeFunc* tf    = TypeFunc::make(dest_method);
-  int             nargs = tf->domain()->cnt() - TypeFunc::Parms;
+  int             nargs = tf->domain_sig()->cnt() - TypeFunc::Parms;
   for (int j = 0; j < nargs; j++) {
-    const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
+    const Type *targ = tf->domain_sig()->field_at(j + TypeFunc::Parms);
     if( targ->basic_type() == T_DOUBLE ) {
       // If any parameters are doubles, they must be rounded before
       // the call, dstore_rounding does gvn.transform
@@ -2231,10 +2243,10 @@
     return;
   }
   const TypeFunc* tf    = TypeFunc::make(dest_method);
-  int             nargs = tf->domain()->cnt() - TypeFunc::Parms;
+  int             nargs = tf->domain_sig()->cnt() - TypeFunc::Parms;
   int skip = Bytecodes::has_receiver(bc) ? 1 : 0;
   for (int j = skip, i = 0; j < nargs && i < TypeProfileArgsLimit; j++) {
-    const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
+    const Type *targ = tf->domain_sig()->field_at(j + TypeFunc::Parms);
     if (targ->basic_type() == T_OBJECT || targ->basic_type() == T_ARRAY) {
       bool maybe_null = true;
       ciKlass* better_type = NULL;
--- old/src/share/vm/opto/machnode.cpp	2016-12-13 09:36:30.564549122 +0100
+++ new/src/share/vm/opto/machnode.cpp	2016-12-13 09:36:30.504549275 +0100
@@ -693,7 +693,7 @@
 const RegMask &MachCallNode::in_RegMask(uint idx) const {
   // Values in the domain use the users calling convention, embodied in the
   // _in_rms array of RegMasks.
-  if (idx < tf()->domain()->cnt()) {
+  if (idx < tf()->domain_sig()->cnt()) {
     return _in_rms[idx];
   }
   if (idx == mach_constant_base_node_input()) {
@@ -726,7 +726,7 @@
 const RegMask &MachCallJavaNode::in_RegMask(uint idx) const {
   // Values in the domain use the users calling convention, embodied in the
   // _in_rms array of RegMasks.
-  if (idx < tf()->domain()->cnt()) {
+  if (idx < tf()->domain_cc()->cnt()) {
     return _in_rms[idx];
   }
   if (idx == mach_constant_base_node_input()) {
--- old/src/share/vm/opto/macro.cpp	2016-12-13 09:36:30.886548305 +0100
+++ new/src/share/vm/opto/macro.cpp	2016-12-13 09:36:30.822548467 +0100
@@ -71,8 +71,8 @@
 
 void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
   // Copy debug information and adjust JVMState information
-  uint old_dbg_start = oldcall->tf()->domain()->cnt();
-  uint new_dbg_start = newcall->tf()->domain()->cnt();
+  uint old_dbg_start = oldcall->tf()->domain_sig()->cnt();
+  uint new_dbg_start = newcall->tf()->domain_sig()->cnt();
   int jvms_adj  = new_dbg_start - old_dbg_start;
   assert (new_dbg_start == newcall->req(), "argument count mismatch");
 
--- old/src/share/vm/opto/macroArrayCopy.cpp	2016-12-13 09:36:31.222547452 +0100
+++ new/src/share/vm/opto/macroArrayCopy.cpp	2016-12-13 09:36:31.155547622 +0100
@@ -70,7 +70,7 @@
                                        Node* parm2, Node* parm3,
                                        Node* parm4, Node* parm5,
                                        Node* parm6, Node* parm7) {
-  int size = call_type->domain()->cnt();
+  int size = call_type->domain_sig()->cnt();
   Node* call = new CallLeafNoFPNode(call_type, call_addr, call_name, adr_type);
   call->init_req(TypeFunc::Control, ctrl);
   call->init_req(TypeFunc::I_O    , top());
--- old/src/share/vm/opto/matcher.cpp	2016-12-13 09:36:31.547546627 +0100
+++ new/src/share/vm/opto/matcher.cpp	2016-12-13 09:36:31.484546787 +0100
@@ -201,7 +201,7 @@
   // Need the method signature to determine the incoming argument types,
   // because the types determine which registers the incoming arguments are
   // in, and this affects the matched code.
-  const TypeTuple *domain = C->tf()->domain();
+  const TypeTuple *domain = C->tf()->domain_cc();
   uint             argcnt = domain->cnt() - TypeFunc::Parms;
   BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
   VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
@@ -716,7 +716,7 @@
   }
 
   // Next unused projection number from Start.
-  int proj_cnt = C->tf()->domain()->cnt();
+  int proj_cnt = C->tf()->domain_cc()->cnt();
 
   // Do all the save-on-entry registers.  Make projections from Start for
   // them, and give them a use at the exit points.  To the allocator, they
@@ -1177,7 +1177,7 @@
   bool             is_method_handle_invoke = false;  // for special kill effects
   if( sfpt->is_Call() ) {
     call = sfpt->as_Call();
-    domain = call->tf()->domain();
+    domain = call->tf()->domain_cc();
     cnt = domain->cnt();
 
     // Match just the call, nothing else
@@ -1305,12 +1305,14 @@
       }
       // Grab first register, adjust stack slots and insert in mask.
       OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
-      if (OptoReg::is_valid(reg1))
+      if (OptoReg::is_valid(reg1)) {
         rm->Insert( reg1 );
+      }
       // Grab second register (if any), adjust stack slots and insert in mask.
       OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
-      if (OptoReg::is_valid(reg2))
+      if (OptoReg::is_valid(reg2)) {
         rm->Insert( reg2 );
+      }
     } // End of for all arguments
 
     // Compute number of stack slots needed to restore stack in case of
@@ -1351,7 +1353,7 @@
 
   // Debug inputs begin just after the last incoming parameter
   assert((mcall == NULL) || (mcall->jvms() == NULL) ||
-         (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "");
+         (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain_cc()->cnt()), "");
 
   // Move the OopMap
   msfpt->_oop_map = sfpt->_oop_map;
--- old/src/share/vm/opto/parse1.cpp	2016-12-13 09:36:31.889545758 +0100
+++ new/src/share/vm/opto/parse1.cpp	2016-12-13 09:36:31.826545918 +0100
@@ -788,17 +788,52 @@
   }
 }
 
+// Helper function to create a ValueTypeNode from its fields passed as
+// arguments. Fields are passed in order of increasing offsets.
+static Node* create_vt_node(StartNode* start, ciValueKlass* vk, ciValueKlass* base_vk, int base_offset, int base_input, Compile* C) {
+  assert(base_offset >= 0, "offset in value type always positive");
+  PhaseGVN& gvn = *C->initial_gvn();
+  ValueTypeNode* vt = ValueTypeNode::make(gvn, vk);
+  for (uint i = 0; i < vt->field_count(); i++) {
+    ciType* field_type = vt->get_field_type(i);
+    int offset = base_offset + vt->get_field_offset(i) - (base_offset > 0 ? vk->first_field_offset() : 0);
+    if (field_type->is_valuetype()) {
+      ciValueKlass* embedded_vk = field_type->as_value_klass();
+      Node* embedded_vt = create_vt_node(start, embedded_vk, base_vk, offset, base_input, C);
+      vt->set_field_value(i, embedded_vt);
+    } else {
+      int j = 0; int extra = 0;
+      for (; j < base_vk->nof_nonstatic_fields(); j++) {
+        ciField* f = base_vk->nonstatic_field_at(j);
+        if (offset == f->offset()) {
+          assert(f->type() == field_type, "inconsistent field type");
+          break;
+        }
+        BasicType bt = f->type()->basic_type();
+        if (bt == T_LONG || bt == T_DOUBLE) {
+          extra++;
+        }
+      }
+      assert(j != base_vk->nof_nonstatic_fields(), "must find");
+      Node* parm = gvn.transform(new ParmNode(start, base_input + j + extra));
+      vt->set_field_value(i, parm);
+      // Record all these guys for later GVN.
+      C->record_for_igvn(parm);
+    }
+  }
+  return gvn.transform(vt);
+}
 
 //----------------------------build_start_state-------------------------------
 // Construct a state which contains only the incoming arguments from an
 // unknown caller.  The method & bci will be NULL & InvocationEntryBci.
 JVMState* Compile::build_start_state(StartNode* start, const TypeFunc* tf) {
-  int        arg_size = tf->domain()->cnt();
-  int        max_size = MAX2(arg_size, (int)tf->range()->cnt());
+  int        arg_size_sig = tf->domain_sig()->cnt();
+  int        max_size = MAX2(arg_size_sig, (int)tf->range()->cnt());
   JVMState*  jvms     = new (this) JVMState(max_size - TypeFunc::Parms);
   SafePointNode* map  = new SafePointNode(max_size, NULL);
   record_for_igvn(map);
-  assert(arg_size == TypeFunc::Parms + (is_osr_compilation() ? 1 : method()->arg_size()), "correct arg_size");
+  assert(arg_size_sig == TypeFunc::Parms + (is_osr_compilation() ? 1 : method()->arg_size()), "correct arg_size");
   Node_Notes* old_nn = default_node_notes();
   if (old_nn != NULL && has_method()) {
     Node_Notes* entry_nn = old_nn->clone(this);
@@ -808,21 +843,59 @@
     entry_nn->set_jvms(entry_jvms);
     set_default_node_notes(entry_nn);
   }
-  uint i;
-  for (i = 0; i < (uint)arg_size; i++) {
-    PhaseGVN& gvn = *initial_gvn();
-    Node* parm = gvn.transform(new ParmNode(start, i));
-    // Check if parameter is a value type pointer
-    if (gvn.type(parm)->isa_valuetypeptr()) {
-      // Create ValueTypeNode from the oop and replace the parameter
-      parm = ValueTypeNode::make(gvn, map->memory(), parm);
-    }
-    map->init_req(i, parm);
-    // Record all these guys for later GVN.
-    record_for_igvn(parm);
+  PhaseGVN& gvn = *initial_gvn();
+  uint j = 0;
+  for (uint i = 0; i < (uint)arg_size_sig; i++) {
+    assert(j >= i, "less actual arguments than in the signature?");
+    if (ValueTypePassFieldsAsArgs) {
+      if (i < TypeFunc::Parms) {
+        assert(i == j, "no change before the actual arguments");
+        Node* parm = gvn.transform(new ParmNode(start, i));
+        map->init_req(i, parm);
+        // Record all these guys for later GVN.
+        record_for_igvn(parm);
+        j++;
+      } else {
+        // Value type arguments are not passed by reference: we get an
+        // argument per field of the value type. Build ValueTypeNodes
+        // from the value type arguments.
+        const Type* t = tf->domain_sig()->field_at(i);
+        if (t->isa_valuetypeptr()) {
+          ciValueKlass* vk = t->is_valuetypeptr()->value_type()->value_klass();
+          Node* vt = create_vt_node(start, vk, vk, 0, j, C);
+          map->init_req(i, gvn.transform(vt));
+          int extra = 0;
+          for (int k = 0; k < vk->nof_nonstatic_fields(); k++) {
+            ciField* f = vk->nonstatic_field_at(k);
+            BasicType bt = f->type()->basic_type();
+            if (bt == T_LONG || bt == T_DOUBLE) {
+              extra++;
+            }
+          }
+          j += extra + vk->nof_nonstatic_fields();
+        } else {
+          Node* parm = gvn.transform(new ParmNode(start, j));
+          map->init_req(i, parm);
+          // Record all these guys for later GVN.
+          record_for_igvn(parm);
+          j++;
+        }
+      }
+    } else {
+     Node* parm = gvn.transform(new ParmNode(start, i));
+     // Check if parameter is a value type pointer
+     if (gvn.type(parm)->isa_valuetypeptr()) {
+       // Create ValueTypeNode from the oop and replace the parameter
+       parm = ValueTypeNode::make(gvn, map->memory(), parm);
+     }
+     map->init_req(i, parm);
+     // Record all these guys for later GVN.
+     record_for_igvn(parm);
+     j++;
+    }
   }
-  for (; i < map->req(); i++) {
-    map->init_req(i, top());
+  for (; j < map->req(); j++) {
+    map->init_req(j, top());
   }
   assert(jvms->argoff() == TypeFunc::Parms, "parser gets arguments here");
   set_default_node_notes(old_nn);
@@ -1161,7 +1234,7 @@
   assert(merged_memory(), "");
 
   // Now add the locals which are initially bound to arguments:
-  uint arg_size = tf()->domain()->cnt();
+  uint arg_size = tf()->domain_sig()->cnt();
   ensure_stack(arg_size - TypeFunc::Parms);  // OSR methods have funny args
   for (i = TypeFunc::Parms; i < arg_size; i++) {
     map()->init_req(i, inmap->argument(_caller, i - TypeFunc::Parms));
--- old/src/share/vm/opto/runtime.cpp	2016-12-13 09:36:32.237544875 +0100
+++ new/src/share/vm/opto/runtime.cpp	2016-12-13 09:36:32.169545047 +0100
@@ -615,7 +615,7 @@
 
   const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
 
-  return TypeFunc::make(domain,range);
+  return TypeFunc::make(domain, range);
 }
 
 
@@ -1202,7 +1202,7 @@
   fields = TypeTuple::fields(1);
   fields[TypeFunc::Parms+0] = NULL; // void
   const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
-  return TypeFunc::make(domain,range);
+  return TypeFunc::make(domain, range);
 }
 
 JRT_LEAF(void, OptoRuntime::profile_receiver_type_C(DataLayout* data, oopDesc* receiver))
@@ -1513,7 +1513,7 @@
 
   const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
 
-  return TypeFunc::make(domain,range);
+  return TypeFunc::make(domain, range);
 }
 
 
@@ -1531,7 +1531,7 @@
 
   const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
 
-  return TypeFunc::make(domain,range);
+  return TypeFunc::make(domain, range);
 }
 
 const TypeFunc *OptoRuntime::dtrace_object_alloc_Type() {
@@ -1547,7 +1547,7 @@
 
   const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
 
-  return TypeFunc::make(domain,range);
+  return TypeFunc::make(domain, range);
 }
 
 
--- old/src/share/vm/opto/type.cpp	2016-12-13 09:36:32.564544044 +0100
+++ new/src/share/vm/opto/type.cpp	2016-12-13 09:36:32.502544202 +0100
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "ci/ciField.hpp"
 #include "ci/ciMethodData.hpp"
 #include "ci/ciTypeFlow.hpp"
 #include "ci/ciValueKlass.hpp"
@@ -1811,23 +1812,57 @@
   return (TypeTuple*)(new TypeTuple(TypeFunc::Parms + arg_cnt, field_array))->hashcons();
 }
 
+static void collect_value_fields(ciValueKlass* vk, const Type**& field_array, uint& pos) {
+  for (int j = 0; j < vk->nof_nonstatic_fields(); j++) {
+    ciField* f = vk->nonstatic_field_at(j);
+    BasicType bt = f->type()->basic_type();
+    assert(bt < T_VALUETYPE && bt >= T_BOOLEAN, "not yet supported");
+    field_array[pos++] = Type::get_const_type(f->type());
+    if (bt == T_LONG || bt == T_DOUBLE) {
+      field_array[pos++] = Type::HALF;
+    }
+  }
+}
+
 // Make a TypeTuple from the domain of a method signature
-const TypeTuple *TypeTuple::make_domain(ciInstanceKlass* recv, ciSignature* sig) {
+const TypeTuple *TypeTuple::make_domain(ciInstanceKlass* recv, ciSignature* sig, bool vt_fields_as_args) {
   uint arg_cnt = sig->size();
 
+  int vt_extra = 0;
+  if (vt_fields_as_args) {
+    for (int i = 0; i < sig->count(); i++) {
+      ciType* type = sig->type_at(i);
+      if (type->basic_type() == T_VALUETYPE) {
+        assert(type->is_valuetype(), "inconsistent type");
+        ciValueKlass* vk = (ciValueKlass*)type;
+        vt_extra += vk->extra_value_args();
+      }
+    }
+    assert(((int)arg_cnt) + vt_extra >= 0, "negative number of actual arguments?");
+  }
+
   uint pos = TypeFunc::Parms;
   const Type **field_array;
   if (recv != NULL) {
     arg_cnt++;
-    field_array = fields(arg_cnt);
+    if (vt_fields_as_args && recv->is_valuetype()) {
+      ciValueKlass* vk = (ciValueKlass*)recv;
+      vt_extra += vk->extra_value_args();
+    }
+    field_array = fields(arg_cnt + vt_extra);
     // Use get_const_type here because it respects UseUniqueSubclasses:
-    field_array[pos++] = get_const_type(recv)->join_speculative(TypePtr::NOTNULL);
+    if (vt_fields_as_args && recv->is_valuetype()) {
+      ciValueKlass* vk = (ciValueKlass*)recv;
+      collect_value_fields(vk, field_array, pos);
+    } else {
+      field_array[pos++] = get_const_type(recv)->join_speculative(TypePtr::NOTNULL);
+    }
   } else {
-    field_array = fields(arg_cnt);
+    field_array = fields(arg_cnt + vt_extra);
   }
 
   int i = 0;
-  while (pos < TypeFunc::Parms + arg_cnt) {
+  while (pos < TypeFunc::Parms + arg_cnt + vt_extra) {
     ciType* type = sig->type_at(i);
 
     switch (type->basic_type()) {
@@ -1840,7 +1875,6 @@
       field_array[pos++] = Type::HALF;
       break;
     case T_OBJECT:
-    case T_VALUETYPE:
     case T_ARRAY:
     case T_BOOLEAN:
     case T_CHAR:
@@ -1850,13 +1884,24 @@
     case T_INT:
       field_array[pos++] = get_const_type(type);
       break;
+    case T_VALUETYPE: {
+      assert(type->is_valuetype(), "inconsistent type");
+      if (vt_fields_as_args) {
+        ciValueKlass* vk = (ciValueKlass*)type;
+        collect_value_fields(vk, field_array, pos);
+      } else {
+        field_array[pos++] = get_const_type(type);
+      }
+      break;
+    }
     default:
       ShouldNotReachHere();
     }
     i++;
   }
+  assert(pos == TypeFunc::Parms + arg_cnt + vt_extra, "wrong number of arguments");
 
-  return (TypeTuple*)(new TypeTuple(TypeFunc::Parms + arg_cnt, field_array))->hashcons();
+  return (TypeTuple*)(new TypeTuple(TypeFunc::Parms + arg_cnt + vt_extra, field_array))->hashcons();
 }
 
 const TypeTuple *TypeTuple::make( uint cnt, const Type **fields ) {
@@ -5385,8 +5430,12 @@
 // Convenience common pre-built types.
 
 //------------------------------make-------------------------------------------
+const TypeFunc *TypeFunc::make( const TypeTuple *domain_sig, const TypeTuple* domain_cc, const TypeTuple *range ) {
+  return (TypeFunc*)(new TypeFunc(domain_sig, domain_cc, range))->hashcons();
+}
+
 const TypeFunc *TypeFunc::make( const TypeTuple *domain, const TypeTuple *range ) {
-  return (TypeFunc*)(new TypeFunc(domain,range))->hashcons();
+  return make(domain, domain, range);
 }
 
 //------------------------------make-------------------------------------------
@@ -5394,14 +5443,22 @@
   Compile* C = Compile::current();
   const TypeFunc* tf = C->last_tf(method); // check cache
   if (tf != NULL)  return tf;  // The hit rate here is almost 50%.
-  const TypeTuple *domain;
+  const TypeTuple *domain_sig, *domain_cc;
+  // Value type arguments are not passed by reference, instead each
+  // field of the value type is passed as an argument. We maintain 2
+  // views of the argument list here: one based on the signature (with
+  // a value type argument as a single slot), one based on the actual
+  // calling convention (with a value type argument as a list of its
+  // fields).
   if (method->is_static()) {
-    domain = TypeTuple::make_domain(NULL, method->signature());
+    domain_sig = TypeTuple::make_domain(NULL, method->signature(), false);
+    domain_cc = TypeTuple::make_domain(NULL, method->signature(), ValueTypePassFieldsAsArgs);
   } else {
-    domain = TypeTuple::make_domain(method->holder(), method->signature());
+    domain_sig = TypeTuple::make_domain(method->holder(), method->signature(), false);
+    domain_cc = TypeTuple::make_domain(method->holder(), method->signature(), ValueTypePassFieldsAsArgs);
   }
   const TypeTuple *range  = TypeTuple::make_range(method->signature());
-  tf = TypeFunc::make(domain, range);
+  tf = TypeFunc::make(domain_sig, domain_cc, range);
   C->set_last_tf(method, tf);  // fill cache
   return tf;
 }
@@ -5437,14 +5494,15 @@
 // Structural equality check for Type representations
 bool TypeFunc::eq( const Type *t ) const {
   const TypeFunc *a = (const TypeFunc*)t;
-  return _domain == a->_domain &&
+  return _domain_sig == a->_domain_sig &&
+    _domain_cc == a->_domain_cc &&
     _range == a->_range;
 }
 
 //------------------------------hash-------------------------------------------
 // Type-specific hashing function.
 int TypeFunc::hash(void) const {
-  return (intptr_t)_domain + (intptr_t)_range;
+  return (intptr_t)_domain_sig + (intptr_t)_domain_cc + (intptr_t)_range;
 }
 
 //------------------------------dump2------------------------------------------
@@ -5468,11 +5526,11 @@
     return;
   }
   d.Insert((void*)this,(void*)this);    // Stop recursion
-  if (Parms < _domain->cnt())
-    _domain->field_at(Parms)->dump2(d,depth-1,st);
-  for (uint i = Parms+1; i < _domain->cnt(); i++) {
+  if (Parms < _domain_sig->cnt())
+    _domain_sig->field_at(Parms)->dump2(d,depth-1,st);
+  for (uint i = Parms+1; i < _domain_sig->cnt(); i++) {
     st->print(", ");
-    _domain->field_at(i)->dump2(d,depth-1,st);
+    _domain_sig->field_at(i)->dump2(d,depth-1,st);
   }
   st->print(" )");
 }
--- old/src/share/vm/opto/type.hpp	2016-12-13 09:36:32.928543120 +0100
+++ new/src/share/vm/opto/type.hpp	2016-12-13 09:36:32.864543283 +0100
@@ -649,7 +649,7 @@
 
   static const TypeTuple *make( uint cnt, const Type **fields );
   static const TypeTuple *make_range(ciSignature *sig);
-  static const TypeTuple *make_domain(ciInstanceKlass* recv, ciSignature *sig);
+  static const TypeTuple *make_domain(ciInstanceKlass* recv, ciSignature *sig, bool vt_fields_as_args = false);
 
   // Subroutine call type with space allocated for argument types
   // Memory for Control, I_O, Memory, FramePtr, and ReturnAdr is allocated implicitly
@@ -1532,13 +1532,20 @@
 //------------------------------TypeFunc---------------------------------------
 // Class of Array Types
 class TypeFunc : public Type {
-  TypeFunc( const TypeTuple *domain, const TypeTuple *range ) : Type(Function),  _domain(domain), _range(range) {}
+  TypeFunc(const TypeTuple *domain_sig, const TypeTuple *domain_cc, const TypeTuple *range) : Type(Function), _domain_sig(domain_sig), _domain_cc(domain_cc), _range(range) {}
   virtual bool eq( const Type *t ) const;
   virtual int  hash() const;             // Type specific hashing
   virtual bool singleton(void) const;    // TRUE if type is a singleton
   virtual bool empty(void) const;        // TRUE if type is vacuous
 
-  const TypeTuple* const _domain;     // Domain of inputs
+  // Domains of inputs: value type arguments are not passed by
+  // reference, instead each field of the value type is passed as an
+  // argument. We maintain 2 views of the argument list here: one
+  // based on the signature (with a value type argument as a single
+  // slot), one based on the actual calling convention (with a value
+  // type argument as a list of its fields).
+  const TypeTuple* const _domain_sig;
+  const TypeTuple* const _domain_cc;
   const TypeTuple* const _range;      // Range of results
 
 public:
@@ -1553,11 +1560,13 @@
 
 
   // Accessors:
-  const TypeTuple* domain() const { return _domain; }
+  const TypeTuple* domain_sig() const { return _domain_sig; }
+  const TypeTuple* domain_cc() const { return _domain_cc; }
   const TypeTuple* range()  const { return _range; }
 
   static const TypeFunc *make(ciMethod* method);
   static const TypeFunc *make(ciSignature signature, const Type* extra);
+  static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* domain_cc, const TypeTuple* range);
   static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* range);
 
   virtual const Type *xmeet( const Type *t ) const;
--- old/src/share/vm/opto/valuetypenode.cpp	2016-12-13 09:36:33.304542166 +0100
+++ new/src/share/vm/opto/valuetypenode.cpp	2016-12-13 09:36:33.234542343 +0100
@@ -29,7 +29,7 @@
 #include "opto/valuetypenode.hpp"
 #include "opto/phaseX.hpp"
 
-Node* ValueTypeNode::make(PhaseGVN& gvn, ciValueKlass* klass) {
+ValueTypeNode* ValueTypeNode::make(PhaseGVN& gvn, ciValueKlass* klass) {
   // Create a new ValueTypeNode with uninitialized values and NULL oop
   const TypeValueType* type = TypeValueType::make(klass);
   return new ValueTypeNode(type, gvn.zerocon(T_VALUETYPE));
@@ -298,7 +298,7 @@
 void ValueTypeNode::make_scalar_in_safepoints(Compile* C) {
   const TypeValueTypePtr* res_type = TypeValueTypePtr::make(bottom_type()->isa_valuetype(), TypePtr::NotNull);
   ciValueKlass* vk = value_klass();
-  uint nfields = vk->field_count();
+  uint nfields = vk->flattened_field_count();
   for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
     Node* u = fast_out(i);
     if (u->is_SafePoint() && (!u->is_Call() || u->as_Call()->has_debug_use(this))) {
@@ -337,6 +337,44 @@
   }
 }
 
+uint ValueTypeNode::set_arguments_for_java_call(CallJavaNode* call, int base_input, const GraphKit& kit, ciValueKlass* base_vk, int base_offset) {
+  ciValueKlass* vk = value_klass();
+  if (base_vk == NULL) {
+    base_vk = vk;
+  }
+  uint edges = 0;
+  for (uint i = 0; i < field_count(); i++) {
+    ciType* field_type = get_field_type(i);
+    int offset = base_offset + get_field_offset(i) - (base_offset > 0 ? vk->first_field_offset() : 0);
+    Node* arg = get_field_value(i);
+    if (field_type->is_valuetype()) {
+      ciValueKlass* embedded_vk = field_type->as_value_klass();
+      edges += arg->as_ValueType()->set_arguments_for_java_call(call, base_input, kit, base_vk, offset);
+    } else {
+      int j = 0; int extra = 0;
+      for (; j < base_vk->nof_nonstatic_fields(); j++) {
+        ciField* f = base_vk->nonstatic_field_at(j);
+        if (offset == f->offset()) {
+          assert(f->type() == field_type, "inconsistent field type");
+          break;
+        }
+        BasicType bt = f->type()->basic_type();
+        if (bt == T_LONG || bt == T_DOUBLE) {
+          extra++;
+        }
+      }
+      call->init_req(base_input + j + extra, arg);
+      edges++;
+      BasicType bt = field_type->basic_type();
+      if (bt == T_LONG || bt == T_DOUBLE) {
+        call->init_req(base_input + j + extra + 1, kit.top());
+        edges++;
+      }
+    }
+  }
+  return edges;
+}
+
 Node* ValueTypeNode::Ideal(PhaseGVN* phase, bool can_reshape) {
   // No optimizations for now
   return NULL;
--- old/src/share/vm/opto/valuetypenode.hpp	2016-12-13 09:36:33.631541335 +0100
+++ new/src/share/vm/opto/valuetypenode.hpp	2016-12-13 09:36:33.570541490 +0100
@@ -56,7 +56,7 @@
 
 public:
   // Create a new ValueTypeNode with uninitialized values
-  static Node* make(PhaseGVN& gvn, ciValueKlass* klass);
+  static ValueTypeNode* make(PhaseGVN& gvn, ciValueKlass* klass);
   // Create a new ValueTypeNode and load its values from an oop
   static Node* make(PhaseGVN& gvn, Node* mem, Node* oop);
   // Create a new ValueTypeNode and load its values from a flattened value type field
@@ -86,7 +86,7 @@
 
   // Replace ValueTypeNodes in debug info at safepoints with SafePointScalarObjectNodes
   void make_scalar_in_safepoints(Compile* C);
-
+  uint set_arguments_for_java_call(CallJavaNode* call, int base_input, const GraphKit& kit, ciValueKlass* base_vk = NULL, int base_offset = 0);
   virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
   virtual int Opcode() const;
 
--- old/src/share/vm/runtime/globals.hpp	2016-12-13 09:36:33.964540490 +0100
+++ new/src/share/vm/runtime/globals.hpp	2016-12-13 09:36:33.886540688 +0100
@@ -4186,8 +4186,10 @@
   diagnostic(bool, CompilerDirectivesPrint, false,                          \
              "Print compiler directives on installation.")                  \
   diagnostic(int,  CompilerDirectivesLimit, 50,                             \
-             "Limit on number of compiler directives.")
-
+             "Limit on number of compiler directives.")                     \
+                                                                            \
+  experimental(bool, ValueTypePassFieldsAsArgs, true,                       \
+               "Pass each field as an argument at calls")                   \
 
 /*
  *  Macros for factoring of globals
--- old/src/share/vm/runtime/sharedRuntime.cpp	2016-12-13 09:36:34.320539586 +0100
+++ new/src/share/vm/runtime/sharedRuntime.cpp	2016-12-13 09:36:34.255539751 +0100
@@ -39,7 +39,10 @@
 #include "interpreter/interpreterRuntime.hpp"
 #include "logging/log.hpp"
 #include "memory/universe.inline.hpp"
+#include "oops/fieldStreams.hpp"
+#include "oops/objArrayOop.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "oops/valueKlass.hpp"
 #include "prims/forte.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
@@ -1155,7 +1158,8 @@
 
   bool has_receiver = bc != Bytecodes::_invokestatic &&
                       bc != Bytecodes::_invokedynamic &&
-                      bc != Bytecodes::_invokehandle;
+                      bc != Bytecodes::_invokehandle &&
+                      bc != Bytecodes::_invokedirect;
 
   // Find receiver for non-static call
   if (has_receiver) {
@@ -1367,9 +1371,9 @@
 #endif
 
   if (is_virtual) {
-    assert(receiver.not_null() || invoke_code == Bytecodes::_invokehandle, "sanity check");
+    assert(receiver.not_null() || invoke_code == Bytecodes::_invokehandle || invoke_code == Bytecodes::_invokedirect, "sanity check");
     bool static_bound = call_info.resolved_method()->can_be_statically_bound();
-    KlassHandle h_klass(THREAD, invoke_code == Bytecodes::_invokehandle ? NULL : receiver->klass());
+    KlassHandle h_klass(THREAD, (invoke_code == Bytecodes::_invokehandle || invoke_code == Bytecodes::_invokedirect) ? NULL : receiver->klass());
     CompiledIC::compute_monomorphic_entry(callee_method, h_klass,
                      is_optimized, static_bound, virtual_call_info,
                      CHECK_(methodHandle()));
@@ -2499,6 +2503,61 @@
   return _adapters->new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 }
 
+// Value type arguments are not passed by reference, instead each
+// field of the value type is passed as an argument. This helper
+// function collects the fields of the value types (including embedded
+// value type's fields) in a list. Included with the field's type is
+// the offset of each field in the value type: i2c and c2i adapters
+// need that to load or store fields. Finally, the list of fields is
+// sorted in order of increasing offsets: the adapters and the
+// compiled code need and agreed upon order of fields.
+//
+// The list of basic type that is returned starts with a T_VALUETYPE
+// and ends with an extra T_VOID. T_VALUETYPE/T_VOID are used as
+// delimiters. Every entry between the two is a field of the value
+// type. If there's an embedded value type in the list, it also starts
+// with a T_VALUETYPE and ends with a T_VOID. This is so we can
+// generate a unique fingerprint for the method's adapters and we can
+// generate the list of basic types from the interpreter point of view
+// (value types passed as reference: iterate on the list until a
+// T_VALUETYPE, drop everything until and including the closing
+// T_VOID) or the compiler point of view (each field of the value
+// types is an argument: drop all T_VALUETYPE/T_VOID from the list).
+static GrowableArray<SigEntry> collect_fields(ValueKlass* vk, int base_off = 0) {
+  GrowableArray<SigEntry> sig_extended;
+  sig_extended.push(SigEntry(T_VALUETYPE, base_off));
+  for (JavaFieldStream fs(vk); !fs.done(); fs.next()) {
+    if (fs.access_flags().is_static())  continue;
+    fieldDescriptor& fd = fs.field_descriptor();
+    BasicType bt = fd.field_type();
+    int offset = base_off + fd.offset() - (base_off > 0 ? vk->first_field_offset() : 0);
+    if (bt == T_VALUETYPE) {
+      Symbol* signature = fd.signature();
+      JavaThread* THREAD = JavaThread::current();
+      oop loader = vk->class_loader();
+      oop protection_domain = vk->protection_domain();
+      Klass* klass = SystemDictionary::resolve_or_null(signature,
+                                                       Handle(THREAD, loader), Handle(THREAD, protection_domain),
+                                                       THREAD);
+      assert(klass != NULL && !HAS_PENDING_EXCEPTION, "lookup shouldn't fail");
+      const GrowableArray<SigEntry>& embedded = collect_fields(ValueKlass::cast(klass), offset);
+      sig_extended.appendAll(&embedded);
+    } else {
+      sig_extended.push(SigEntry(bt, offset));
+      if (bt == T_LONG || bt == T_DOUBLE) {
+        sig_extended.push(SigEntry(T_VOID, offset));
+      }
+    }
+  }
+  int offset = base_off + vk->size_helper()*HeapWordSize - (base_off > 0 ? vk->first_field_offset() : 0);
+  sig_extended.push(SigEntry(T_VOID, offset)); // hack: use T_VOID to mark end of value type fields
+  if (base_off == 0) {
+    sig_extended.sort(SigEntry::compare);
+  }
+  assert(sig_extended.at(0)._bt == T_VALUETYPE && sig_extended.at(sig_extended.length()-1)._bt == T_VOID, "broken structure");
+  return sig_extended;
+}
+
 AdapterHandlerEntry* AdapterHandlerLibrary::get_adapter(const methodHandle& method) {
   // Use customized signature handler.  Need to lock around updates to
   // the AdapterHandlerTable (it is not safe for concurrent readers
@@ -2507,7 +2566,7 @@
 
   ResourceMark rm;
 
-  NOT_PRODUCT(int insts_size);
+  NOT_PRODUCT(int insts_size = 0);
   AdapterBlob* new_adapter = NULL;
   AdapterHandlerEntry* entry = NULL;
   AdapterFingerPrint* fingerprint = NULL;
@@ -2530,22 +2589,78 @@
     }
 
     // Fill in the signature array, for the calling-convention call.
-    int total_args_passed = method->size_of_parameters(); // All args on stack
+    GrowableArray<SigEntry> sig_extended;
+    {
+      MutexUnlocker mul(AdapterHandlerLibrary_lock);
+      Thread* THREAD = Thread::current();
+      Handle class_loader(THREAD, method->method_holder()->class_loader());
+      Handle protection_domain(THREAD, method->method_holder()->protection_domain());
+      GrowableArray<BasicType> sig_bt_tmp;
+      int value_klasses = 0;
+
+      int i = 0;
+      if (!method->is_static()) {  // Pass in receiver first
+        Klass* holder = method->method_holder();
+        if (ValueTypePassFieldsAsArgs && holder->is_value()) {
+          value_klasses++;
+          ValueKlass* vk = ValueKlass::cast(holder);
+          const GrowableArray<SigEntry>& sig_vk = collect_fields(vk);
+          sig_extended.appendAll(&sig_vk);
+        } else {
+          sig_extended.push(SigEntry(T_OBJECT));
+        }
+      }
+      for (SignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) {
+        if (ValueTypePassFieldsAsArgs && ss.type() == T_VALUETYPE) {
+          value_klasses++;
+          Klass* k = ss.as_klass(class_loader, protection_domain, SignatureStream::ReturnNull, THREAD);
+          assert(k != NULL && !HAS_PENDING_EXCEPTION, "can resolve klass?");
+          ValueKlass* vk = ValueKlass::cast(k);
+          const GrowableArray<SigEntry>& sig_vk = collect_fields(vk);
+          sig_extended.appendAll(&sig_vk);
+        } else {
+          sig_extended.push(SigEntry(ss.type()));
+          if (ss.type() == T_LONG || ss.type() == T_DOUBLE) {
+            sig_extended.push(SigEntry(T_VOID));
+          }
+        }
+      }
+    }
 
-    BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
-    VMRegPair* regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
-    int i = 0;
-    if (!method->is_static())  // Pass in receiver first
-      sig_bt[i++] = T_OBJECT;
-    for (SignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) {
-      sig_bt[i++] = ss.type();  // Collect remaining bits of signature
-      if (ss.type() == T_LONG || ss.type() == T_DOUBLE)
-        sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
+    int values = 0;
+    if (ValueTypePassFieldsAsArgs) {
+      for (int i = 0; i < sig_extended.length(); i++) {
+        if (sig_extended.at(i)._bt == T_VALUETYPE) {
+          values++;
+        }
+      }
     }
-    assert(i == total_args_passed, "");
+    int total_args_passed_cc = sig_extended.length() - 2 * values;
+    BasicType* sig_bt_cc = NEW_RESOURCE_ARRAY(BasicType, total_args_passed_cc);
+    
+    int j = 0;
+    for (int i = 0; i < sig_extended.length(); i++) {
+      if (!ValueTypePassFieldsAsArgs) {
+        sig_bt_cc[j++] = sig_extended.at(i)._bt;
+      } else if (sig_extended.at(i)._bt != T_VALUETYPE &&
+                 (sig_extended.at(i)._bt != T_VOID ||
+                  sig_extended.at(i-1)._bt == T_LONG ||
+                  sig_extended.at(i-1)._bt == T_DOUBLE)) {
+        sig_bt_cc[j++] = sig_extended.at(i)._bt;
+      }
+    }
+    assert(j == total_args_passed_cc, "bad number of arguments");
+
+    int total_args_passed_fp = sig_extended.length();
+    BasicType* sig_bt_fp = NEW_RESOURCE_ARRAY(BasicType, total_args_passed_fp);
+    for (int i = 0; i < sig_extended.length(); i++) {
+      sig_bt_fp[i] = sig_extended.at(i)._bt;
+    }
+
+    VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed_cc);
 
     // Lookup method signature's fingerprint
-    entry = _adapters->lookup(total_args_passed, sig_bt);
+    entry = _adapters->lookup(total_args_passed_fp, sig_bt_fp);
 
 #ifdef ASSERT
     AdapterHandlerEntry* shared_entry = NULL;
@@ -2561,10 +2676,10 @@
     }
 
     // Get a description of the compiled java calling convention and the largest used (VMReg) stack slot usage
-    int comp_args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, total_args_passed, false);
+    int comp_args_on_stack = SharedRuntime::java_calling_convention(sig_bt_cc, regs, total_args_passed_cc, false);
 
     // Make a C heap allocated version of the fingerprint to store in the adapter
-    fingerprint = new AdapterFingerPrint(total_args_passed, sig_bt);
+    fingerprint = new AdapterFingerPrint(total_args_passed_fp, sig_bt_fp);
 
     // StubRoutines::code2() is initialized after this function can be called. As a result,
     // VerifyAdapterCalls and VerifyAdapterSharing can fail if we re-use code that generated
@@ -2580,13 +2695,14 @@
       buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
                                              sizeof(buffer_locs)/sizeof(relocInfo));
 
+
       MacroAssembler _masm(&buffer);
       entry = SharedRuntime::generate_i2c2i_adapters(&_masm,
-                                                     total_args_passed,
                                                      comp_args_on_stack,
-                                                     sig_bt,
+                                                     sig_extended,
                                                      regs,
-                                                     fingerprint);
+                                                     fingerprint,
+                                                     new_adapter);
 #ifdef ASSERT
       if (VerifyAdapterSharing) {
         if (shared_entry != NULL) {
@@ -2600,7 +2716,6 @@
       }
 #endif
 
-      new_adapter = AdapterBlob::create(&buffer);
       NOT_PRODUCT(insts_size = buffer.insts_size());
     }
     if (new_adapter == NULL) {
@@ -3074,3 +3189,50 @@
   return activation;
 }
 
+// We are at a compiled code to interpreter call. We need backing
+// buffers for all value type arguments. Allocate an object array to
+// hold them (convenient because once we're done with it we don't have
+// to worry about freeing it).
+JRT_ENTRY(void, SharedRuntime::allocate_value_types(JavaThread* thread))
+{
+  assert(ValueTypePassFieldsAsArgs, "no reason to call this");
+  ResourceMark rm;
+  JavaThread* THREAD = thread;
+  vframeStream vfst(thread);
+  methodHandle caller(thread, vfst.method());
+  int bci   = vfst.bci();
+  Bytecode_invoke bytecode(caller, bci);
+  methodHandle callee = bytecode.static_target(CHECK);
+
+  int nb_slots = 0;
+  if (!callee->is_static() && callee->method_holder()->is_value()) {
+    nb_slots++;
+  }
+  Handle class_loader(THREAD, callee->method_holder()->class_loader());
+  Handle protection_domain(THREAD, callee->method_holder()->protection_domain());
+  for (SignatureStream ss(callee->signature()); !ss.at_return_type(); ss.next()) {
+    if (ss.type() == T_VALUETYPE) {
+      nb_slots++;
+    }
+  }
+  objArrayHandle array = ObjArrayKlass::cast(Universe::objectArrayKlassObj())->allocate(nb_slots, CHECK);
+  int i = 0;
+  if (!callee->is_static() && callee->method_holder()->is_value()) {
+    ValueKlass* vk = ValueKlass::cast(callee->method_holder());
+    oop res = vk->allocate_instance(CHECK);
+    array->obj_at_put(i, res);
+    i++;
+  }
+  for (SignatureStream ss(callee->signature()); !ss.at_return_type(); ss.next()) {
+    if (ss.type() == T_VALUETYPE) {
+      Klass* k = ss.as_klass(class_loader, protection_domain, SignatureStream::ReturnNull, THREAD);
+      assert(k != NULL && !HAS_PENDING_EXCEPTION, "can't resolve klass");
+      ValueKlass* vk = ValueKlass::cast(k);
+      oop res = vk->allocate_instance(CHECK);
+      array->obj_at_put(i, res);
+      i++;
+    }
+  }
+  thread->set_vm_result(array());
+}
+JRT_END
--- old/src/share/vm/runtime/sharedRuntime.hpp	2016-12-13 09:36:34.661538720 +0100
+++ new/src/share/vm/runtime/sharedRuntime.hpp	2016-12-13 09:36:34.601538872 +0100
@@ -38,6 +38,48 @@
 class AdapterFingerPrint;
 class vframeStream;
 
+// Used for adapter generation. One SigEntry is used per element of
+// the signature of the method. Value type arguments are treated
+// specially. See comment for collect_fields().
+class SigEntry VALUE_OBJ_CLASS_SPEC {
+ public:
+  BasicType _bt;
+  int _offset;
+    
+  SigEntry()
+    : _bt(T_ILLEGAL), _offset(-1) {
+  }
+  SigEntry(BasicType bt, int offset)
+    : _bt(bt), _offset(offset) {}
+
+  SigEntry(BasicType bt)
+    : _bt(bt), _offset(-1) {}
+  
+  static int compare(SigEntry* e1, SigEntry* e2) {
+    if (e1->_offset != e2->_offset) {
+      return e1->_offset - e2->_offset;
+    }
+    assert((e1->_bt == T_LONG && (e2->_bt == T_LONG || e2->_bt == T_VOID)) ||
+           (e1->_bt == T_DOUBLE && (e2->_bt == T_DOUBLE || e2->_bt == T_VOID)) ||
+           e1->_bt == T_VALUETYPE || e2->_bt == T_VALUETYPE || e1->_bt == T_VOID || e2->_bt == T_VOID, "bad bt");
+    if (e1->_bt == e2->_bt) {
+      assert(e1->_bt == T_VALUETYPE || e1->_bt == T_VOID, "only ones with duplicate offsets");
+      return 0;
+    }
+    if (e1->_bt == T_VOID ||
+        e2->_bt == T_VALUETYPE) {
+      return 1;
+    }
+    if (e1->_bt == T_VALUETYPE ||
+        e2->_bt == T_VOID) {
+      return -1;
+    }
+    ShouldNotReachHere();
+    return 0;
+  }
+};
+
+
 // Runtime is the base class for various runtime interfaces
 // (InterpreterRuntime, CompilerRuntime, etc.). It provides
 // shared functionality such as exception forwarding (C++ to
@@ -431,16 +473,15 @@
   // handshaking path with compiled code to keep the stack walking correct.
 
   static AdapterHandlerEntry* generate_i2c2i_adapters(MacroAssembler *_masm,
-                                                      int total_args_passed,
-                                                      int max_arg,
-                                                      const BasicType *sig_bt,
+                                                      int comp_args_on_stack,
+                                                      const GrowableArray<SigEntry>& sig_extended,
                                                       const VMRegPair *regs,
-                                                      AdapterFingerPrint* fingerprint);
+                                                      AdapterFingerPrint* fingerprint,
+                                                      AdapterBlob*& new_adapter);
 
   static void gen_i2c_adapter(MacroAssembler *_masm,
-                              int total_args_passed,
                               int comp_args_on_stack,
-                              const BasicType *sig_bt,
+                              const GrowableArray<SigEntry>& sig_extended,
                               const VMRegPair *regs);
 
   // OSR support
@@ -522,6 +563,7 @@
   static address handle_wrong_method(JavaThread* thread);
   static address handle_wrong_method_abstract(JavaThread* thread);
   static address handle_wrong_method_ic_miss(JavaThread* thread);
+  static void allocate_value_types(JavaThread* thread);
 
 #ifndef PRODUCT
 
--- old/test/compiler/valhalla/valuetypes/ValueTypeTestBench.java	2016-12-13 09:36:34.982537905 +0100
+++ new/test/compiler/valhalla/valuetypes/ValueTypeTestBench.java	2016-12-13 09:36:34.920538062 +0100
@@ -29,7 +29,10 @@
  * @build compiler.valhalla.valuetypes.ValueTypeTestBench
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  * @run main ClassFileInstaller jdk.test.lib.Platform
- * @run main/othervm -noverify -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ * @run main/othervm -ea -noverify -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-TieredCompilation compiler.valhalla.valuetypes.ValueTypeTestBench
+ * @run main/othervm -ea -noverify -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:+UnlockExperimentalVMOptions -XX:-ValueTypePassFieldsAsArgs
  *                   -XX:-TieredCompilation compiler.valhalla.valuetypes.ValueTypeTestBench
  */
 
@@ -46,9 +49,12 @@
 
 import java.lang.annotation.Retention;
 import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Repeatable;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Hashtable;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -89,7 +95,7 @@
 
     @DontCompile
     public long hashInterpreted() {
-        return s + sf + x + y + c + v1.hash() + v2.hash() + v3.hash();
+        return s + sf + x + y + c + v1.hashInterpreted() + v2.hashInterpreted() + v3.hashInterpreted();
     }
 }
 
@@ -113,6 +119,11 @@
     public long hash() {
         return x + (b ? 0 : 1) + c;
     }
+
+    @DontInline
+    public long hashInterpreted() {
+        return x + (b ? 0 : 1) + c;
+    }
 }
 
 public class ValueTypeTestBench {
@@ -166,7 +177,8 @@
     }
 
     // Return incoming value type without accessing fields
-    @Test(failOn = ALLOC + LOAD + STORE + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, match = {ALLOC, STORE}, matchCount = {1, 9}, failOn = LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, failOn = ALLOC + LOAD + STORE + TRAP)
     public MyValue1 test3(MyValue1 v) {
         return v;
     }
@@ -214,7 +226,8 @@
 
     // Create a value type in compiled code and pass it to
     // the interpreter via a call.
-    @Test(match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = LOAD + TRAP + ALLOC)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
     public long test6() {
         MyValue1 v = MyValue1.createInline(rI, rL);
         // Pass to interpreter
@@ -259,7 +272,8 @@
     }
 
     // Merge value types created from two branches
-    @Test(match = {ALLOC, STORE}, matchCount = {1, 9}, failOn = LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, match = {LOAD}, matchCount = {9}, failOn = TRAP + ALLOC + STORE)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, match = {ALLOC, STORE}, matchCount = {1, 9}, failOn = LOAD + TRAP)
     public MyValue1 test9(boolean b) {
         MyValue1 v;
         if (b) {
@@ -370,7 +384,8 @@
 
     // Create a value type in a non-inlined method and then call a
     // non-inlined method on that value type.
-    @Test(failOn = (ALLOC + LOAD + STORE + TRAP))
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = (ALLOC + STORE + TRAP), match = {LOAD}, matchCount = {9})
+    @Test(valid = ValueTypePassFieldsAsArgsOff, failOn = (ALLOC + LOAD + STORE + TRAP))
     public long test14() {
         MyValue1 v = MyValue1.createDontInline(rI, rL);
         return v.hashInterpreted();
@@ -384,7 +399,8 @@
 
     // Create a value type in an inlined method and then call a
     // non-inlined method on that value type.
-    @Test(failOn = (LOAD + TRAP), match = {ALLOC}, matchCount = {1})
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = (LOAD + TRAP + ALLOC))
+    @Test(valid = ValueTypePassFieldsAsArgsOff, failOn = (LOAD + TRAP), match = {ALLOC}, matchCount = {1})
     public long test15() {
         MyValue1 v = MyValue1.createInline(rI, rL);
         return v.hashInterpreted();
@@ -427,7 +443,8 @@
     // Create a value type in compiled code and pass it to the
     // interpreter via a call. The value is live at the first call so
     // debug info should include a reference to all its fields.
-    @Test(match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = ALLOC + LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
     public long test18() {
         MyValue1 v = MyValue1.createInline(rI, rL);
         v.hashInterpreted();
@@ -443,7 +460,8 @@
     // Create a value type in compiled code and pass it to the
     // interpreter via a call. The value type is passed twice but
     // should only be allocated once.
-    @Test(match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = ALLOC + LOAD + TRAP)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, match = {ALLOC}, matchCount = {1}, failOn = LOAD + TRAP)
     public long test19() {
         MyValue1 v = MyValue1.createInline(rI, rL);
         return sumValue(v, v);
@@ -464,12 +482,13 @@
     // interpreter via a call. The value type is live at the uncommon
     // trap: verify that deoptimization causes the value type to be
     // correctly allocated.
-    @Test(match = {ALLOC}, matchCount = {1}, failOn = LOAD)
+    @Test(valid = ValueTypePassFieldsAsArgsOn, failOn = LOAD + ALLOC + STORE)
+    @Test(valid = ValueTypePassFieldsAsArgsOff, match = {ALLOC}, matchCount = {1}, failOn = LOAD)
     public long test20(boolean flag) {
         MyValue1 v = MyValue1.createInline(rI, rL);
         if (flag) {
             // uncommon trap
-            WHITE_BOX.deoptimizeMethod(tests.get("ValueTypeTestBench::test16"));
+            WHITE_BOX.deoptimizeMethod(tests.get("ValueTypeTestBench::test20"));
         }
         return v.hashInterpreted();
     }
@@ -552,10 +571,217 @@
         Asserts.assertEQ(result, hash());
     }
 
+    // Test interpreter to compiled code with various signatures
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test24(MyValue2 v) {
+        return v.hash();
+    }
+
+    @DontCompile
+    public void test24_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test24(v);
+        Asserts.assertEQ(result, v.hashInterpreted());
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test25(int i1, MyValue2 v, int i2) {
+        return v.hash() + i1 - i2;
+    }
+
+    @DontCompile
+    public void test25_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test25(rI, v, 2*rI);
+        Asserts.assertEQ(result, v.hashInterpreted() - rI);
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test26(long l1, MyValue2 v, long l2) {
+        return v.hash() + l1 - l2;
+    }
+
+    @DontCompile
+    public void test26_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test26(rL, v, 2*rL);
+        Asserts.assertEQ(result, v.hashInterpreted() - rL);
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test27(int i, MyValue2 v, long l) {
+        return v.hash() + i + l;
+    }
+
+    @DontCompile
+    public void test27_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test27(rI, v, rL);
+        Asserts.assertEQ(result, v.hashInterpreted() + rL + rI);
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test28(long l, MyValue2 v, int i) {
+        return v.hash() + i + l;
+    }
+
+    @DontCompile
+    public void test28_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test28(rL, v, rI);
+        Asserts.assertEQ(result, v.hashInterpreted() + rL + rI);
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test29(long l, MyValue1 v1, int i, MyValue2 v2) {
+        return v1.hash() + i + l + v2.hash();
+    }
+
+    @DontCompile
+    public void test29_verifier(boolean warmup) {
+        MyValue1 v1 = MyValue1.createDontInline(rI, rL);
+        MyValue2 v2 = MyValue2.createInline(rI, true);
+        long result = test29(rL, v1, rI, v2);
+        Asserts.assertEQ(result, v1.hashInterpreted() + rL + rI + v2.hashInterpreted());
+    }
+
+    // Test compiled code to interpreter with various signatures
+    @DontCompile
+    public long test30_interp(MyValue2 v) {
+        return v.hash();
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test30(MyValue2 v) {
+        return test30_interp(v);
+    }
+
+    @DontCompile
+    public void test30_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test30(v);
+        Asserts.assertEQ(result, v.hashInterpreted());
+    }
+
+    @DontCompile
+    public long test31_interp(int i1, MyValue2 v, int i2) {
+        return v.hash() + i1 - i2;
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test31(int i1, MyValue2 v, int i2) {
+        return test31_interp(i1, v, i2);
+    }
+
+    @DontCompile
+    public void test31_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test31(rI, v, 2*rI);
+        Asserts.assertEQ(result, v.hashInterpreted() - rI);
+    }
+
+    @DontCompile
+    public long test32_interp(long l1, MyValue2 v, long l2) {
+        return v.hash() + l1 - l2;
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test32(long l1, MyValue2 v, long l2) {
+        return test32_interp(l1, v, l2);
+    }
+
+    @DontCompile
+    public void test32_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test32(rL, v, 2*rL);
+        Asserts.assertEQ(result, v.hashInterpreted() - rL);
+    }
+
+    @DontCompile
+    public long test33_interp(int i, MyValue2 v, long l) {
+        return v.hash() + i + l;
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test33(int i, MyValue2 v, long l) {
+        return test33_interp(i, v, l);
+    }
+
+    @DontCompile
+    public void test33_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test33(rI, v, rL);
+        Asserts.assertEQ(result, v.hashInterpreted() + rL + rI);
+    }
+
+    @DontCompile
+    public long test34_interp(long l, MyValue2 v, int i) {
+        return v.hash() + i + l;
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test34(long l, MyValue2 v, int i) {
+        return test34_interp(l, v, i);
+    }
+
+    @DontCompile
+    public void test34_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test34(rL, v, rI);
+        Asserts.assertEQ(result, v.hashInterpreted() + rL + rI);
+    }
+
+    @DontCompile
+    public long test35_interp(long l, MyValue1 v1, int i, MyValue2 v2) {
+        return v1.hash() + i + l + v2.hash();
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test35(long l, MyValue1 v1, int i, MyValue2 v2) {
+        return test35_interp(l, v1, i, v2);
+    }
+
+    @DontCompile
+    public void test35_verifier(boolean warmup) {
+        MyValue1 v1 = MyValue1.createDontInline(rI, rL);
+        MyValue2 v2 = MyValue2.createInline(rI, true);
+        long result = test35(rL, v1, rI, v2);
+        Asserts.assertEQ(result, v1.hashInterpreted() + rL + rI + v2.hashInterpreted());
+    }
+
+    // test that debug info at a call is correct
+    @DontCompile
+    public long test36_interp(MyValue2 v, boolean flag) {
+        if (flag) {
+            // uncommon trap
+            WHITE_BOX.deoptimizeMethod(tests.get("ValueTypeTestBench::test36"));
+        }
+        return v.hash();
+    }
+
+    @Test(failOn = ALLOC + STORE + TRAP)
+    public long test36(MyValue2 v, boolean flag, long l) {
+        return test36_interp(v, flag) + l;
+    }
+
+    @DontCompile
+    public void test36_verifier(boolean warmup) {
+        MyValue2 v = MyValue2.createInline(rI, true);
+        long result = test36(v, false, rL);
+        Asserts.assertEQ(result, v.hashInterpreted() + rL);
+        if (!warmup) {
+            result = test36(v, true, rL);
+            Asserts.assertEQ(result, v.hashInterpreted() + rL);
+        }
+    }
 
     // ========== Test infrastructure ==========
 
     private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    private static final int ValueTypePassFieldsAsArgsOn = 0x1;
+    private static final int ValueTypePassFieldsAsArgsOff = 0x2;
+    static final int AllFlags = ValueTypePassFieldsAsArgsOn | ValueTypePassFieldsAsArgsOff;
+    private static final boolean ValueTypePassFieldsAsArgs = (Boolean)WHITE_BOX.getVMFlag("ValueTypePassFieldsAsArgs");
     private static final int COMP_LEVEL_ANY = -1;
     private static final int COMP_LEVEL_FULL_OPTIMIZATION = 4;
     private static final Hashtable<String, Method> tests = new Hashtable<String, Method>();
@@ -580,7 +806,8 @@
     static {
         // Gather all test methods and put them in Hashtable
         for (Method m : ValueTypeTestBench.class.getDeclaredMethods()) {
-            if (m.isAnnotationPresent(Test.class)) {
+            Test[] annos = m.getAnnotationsByType(Test.class);
+            if (annos.length != 0) {
                 tests.put("ValueTypeTestBench::" + m.getName(), m);
             }
         }
@@ -588,15 +815,25 @@
 
     public static void main(String[] args) throws Throwable {
         if (args.length == 0) {
+            ArrayList<String> all_args = new ArrayList(List.of(
+                "-noverify",
+                "-XX:+UnlockDiagnosticVMOptions", "-Xbootclasspath/a:.", "-XX:+WhiteBoxAPI",
+                "-XX:-TieredCompilation", "-XX:-BackgroundCompilation",
+                "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+PrintCompilation", "-XX:+PrintIdeal", "-XX:+PrintOptoAssembly",
+                "-XX:CompileCommand=quiet", "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.ValueTypeTestBench::*",
+                "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue1::*",
+                "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue2::*"
+                                                               ));
             // Run tests in own process and verify output
-            OutputAnalyzer oa = ProcessTools.executeTestJvm("-noverify",
-                    "-XX:+UnlockDiagnosticVMOptions", "-Xbootclasspath/a:.", "-XX:+WhiteBoxAPI",
-                    "-XX:-TieredCompilation", "-XX:-BackgroundCompilation",
-                    "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+PrintCompilation", "-XX:+PrintIdeal", "-XX:+PrintOptoAssembly",
-                    "-XX:CompileCommand=quiet", "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.ValueTypeTestBench::*",
-                    "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue1::*",
-                    "-XX:CompileCommand=compileonly,compiler.valhalla.valuetypes.MyValue2::*",
-                    ValueTypeTestBench.class.getName(), "run");
+            all_args.add("-XX:+UnlockExperimentalVMOptions");
+            if ((Boolean)WHITE_BOX.getVMFlag("ValueTypePassFieldsAsArgs")) {
+                all_args.add("-XX:+ValueTypePassFieldsAsArgs");
+            } else {
+                all_args.add("-XX:-ValueTypePassFieldsAsArgs");
+            }
+            all_args.add(ValueTypeTestBench.class.getName());
+            all_args.add("run");
+            OutputAnalyzer oa = ProcessTools.executeTestJvm(all_args.toArray(new String[0]));
             // If ideal graph printing is enabled/supported, verify output
             String output = oa.getOutput();
             oa.shouldHaveExitValue(0);
@@ -636,7 +873,18 @@
                 System.out.println("\nGraph for " + graph);
             }
             // Parse graph using regular expressions to determine if it contains forbidden nodes
-            Test anno = test.getAnnotation(Test.class);
+            Test[] annos = test.getAnnotationsByType(Test.class);
+            Test anno = null;
+            for (Test a : annos) {
+                if ((a.valid() & ValueTypePassFieldsAsArgsOn) != 0 && ValueTypePassFieldsAsArgs) {
+                    assert anno == null;
+                    anno = a;
+                } else if ((a.valid() & ValueTypePassFieldsAsArgsOff) != 0 && !ValueTypePassFieldsAsArgs) {
+                    assert anno == null;
+                    anno = a;
+                }
+            }
+            assert anno != null;
             String regexFail = anno.failOn();
             if (!regexFail.isEmpty()) {
                 Pattern pattern = Pattern.compile(regexFail.substring(0, regexFail.length()-1));
@@ -723,6 +971,7 @@
 
 // Mark method as test
 @Retention(RetentionPolicy.RUNTIME)
+@Repeatable(Tests.class)
 @interface Test {
     // Regular expression used to match forbidden IR nodes
     // in the C2 IR emitted for this test.
@@ -730,6 +979,12 @@
     // Regular expressions used to match and count IR nodes.
     String[] match() default { };
     int[] matchCount() default { };
+    int valid() default ValueTypeTestBench.AllFlags;
+}
+
+@Retention(RetentionPolicy.RUNTIME)
+@interface Tests {
+    Test[] value();
 }
 
 // Force method inlining during compilation