--- old/./.hgtags	2019-01-24 17:46:27.546382712 +0000
+++ new/./.hgtags	2019-01-24 17:46:26.822350532 +0000
@@ -535,3 +535,4 @@
 50677f43ac3df9a8684222b8893543c60f3aa0bd jdk-13+2
 de9fd809bb475401aad188eab2264226788aad81 jdk-12+26
 642346a11059b9f283110dc301a24ed43b76a94e jdk-13+3
+266942398494aec1ccaae0f3ec9e34e20c2747f8 AArch64_support_start
--- old/src/hotspot/cpu/aarch64/aarch64.ad	2019-01-24 17:46:29.754480850 +0000
+++ new/src/hotspot/cpu/aarch64/aarch64.ad	2019-01-24 17:46:29.042449204 +0000
@@ -1955,6 +1955,30 @@
 }
 
 //=============================================================================
+#ifndef PRODUCT
+void MachVVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+  st->print_cr("MachVVEPNode");
+}
+#endif
+
+void MachVVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+  // Unpack all value type args passed as oop and then jump to
+  // the verified entry point (skipping the unverified entry).
+  MacroAssembler _masm(&cbuf);
+
+  __ unpack_value_args(ra_->C);
+  __ b(*_verified_entry);
+}
+
+uint MachVVEPNode::size(PhaseRegAlloc* ra_) const
+{
+  return MachNode::size(ra_); // too many variables; just compute it the hard way
+}
+
+
+//=============================================================================
 
 #ifndef PRODUCT
 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
@@ -2367,7 +2391,7 @@
 {
   int gps = 0;
   int fps = 0;
-  const TypeTuple *domain = tf->domain();
+  const TypeTuple *domain = tf->domain_cc();
   int max = domain->cnt();
   for (int i = TypeFunc::Parms; i < max; i++) {
     const Type *t = domain->field_at(i);
@@ -7885,6 +7909,21 @@
   ins_pipe(ialu_reg);
 %}
 
+instruct castN2X(iRegLNoSp dst, iRegN src) %{
+  match(Set dst (CastP2X src));
+
+  ins_cost(INSN_COST);
+  format %{ "mov $dst, $src\t# ptr -> long" %}
+
+  ins_encode %{
+    if ($dst$$reg != $src$$reg) {
+      __ mov(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
 instruct castP2X(iRegLNoSp dst, iRegP src) %{
   match(Set dst (CastP2X src));
 
@@ -13398,9 +13437,9 @@
 // ============================================================================
 // clearing of an array
 
-instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
+instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
 %{
-  match(Set dummy (ClearArray cnt base));
+  match(Set dummy (ClearArray (Binary cnt base) val));
   effect(USE_KILL cnt, USE_KILL base);
 
   ins_cost(4 * INSN_COST);
@@ -13413,11 +13452,12 @@
   ins_pipe(pipe_class_memory);
 %}
 
-instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
+instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
 %{
-  predicate((u_int64_t)n->in(2)->get_long()
+  predicate((u_int64_t)n->in(3)->get_long()
             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-  match(Set dummy (ClearArray cnt base));
+
+  match(Set dummy (ClearArray (Binary cnt base) val));
   effect(USE_KILL base);
 
   ins_cost(4 * INSN_COST);
--- old/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp	2019-01-24 17:46:32.654609742 +0000
+++ new/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp	2019-01-24 17:46:31.938577919 +0000
@@ -47,6 +47,7 @@
     case T_DOUBLE : i = 8; break;
     case T_OBJECT : i = 9; break;
     case T_ARRAY  : i = 9; break;
+    case T_VALUETYPE : i = 10; break;
     default       : ShouldNotReachHere();
   }
   assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
--- old/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp	2019-01-24 17:46:34.918710366 +0000
+++ new/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp	2019-01-24 17:46:34.158676588 +0000
@@ -175,11 +175,12 @@
 
 // Implementation of NewObjectArrayStub
 
-NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info, bool is_value_type) {
   _klass_reg = klass_reg;
   _result = result;
   _length = length;
   _info = new CodeEmitInfo(info);
+  _is_value_type = is_value_type; 
 }
 
 
@@ -188,7 +189,13 @@
   __ bind(_entry);
   assert(_length->as_register() == r19, "length must in r19,");
   assert(_klass_reg->as_register() == r3, "klass_reg must in r3");
-  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+
+  if (_is_value_type) {
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_value_array_id)));
+  } else {
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  }
+
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   assert(_result->as_register() == r0, "result must in r0");
@@ -196,16 +203,30 @@
 }
 // Implementation of MonitorAccessStubs
 
-MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info,  CodeStub* throw_imse_stub, LIR_Opr scratch_reg)
 : MonitorAccessStub(obj_reg, lock_reg)
 {
   _info = new CodeEmitInfo(info);
+  _scratch_reg = scratch_reg;
+  _throw_imse_stub = throw_imse_stub;
+  if (_throw_imse_stub != NULL) {
+    assert(_scratch_reg != LIR_OprFact::illegalOpr, "must be");
+  }
 }
 
 
 void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
   assert(__ rsp_offset() == 0, "frame size should be fixed");
   __ bind(_entry);
+  if (_throw_imse_stub != NULL) {
+    // When we come here, _obj_reg has already been checked to be non-null.
+    Register mark = _scratch_reg->as_register();
+    __ ldr(mark, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes()));
+    __ andr(mark, mark, (u1) markOopDesc::always_locked_pattern && 0xF);
+    __ cmp(r2, (u1) markOopDesc::always_locked_pattern); 
+    __ br(Assembler::NE, *_throw_imse_stub->entry());
+  }
+
   ce->store_parameter(_obj_reg->as_register(),  1);
   ce->store_parameter(_lock_reg->as_register(), 0);
   Runtime1::StubID enter_id;
--- old/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp	2019-01-24 17:46:37.114807966 +0000
+++ new/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp	2019-01-24 17:46:36.406776500 +0000
@@ -45,7 +45,7 @@
       Register reg2 = r_2->as_Register();
       assert(reg2 == reg, "must be same register");
       opr = as_long_opr(reg);
-    } else if (type == T_OBJECT || type == T_ARRAY) {
+    } else if (type == T_OBJECT || type == T_ARRAY || type == T_VALUETYPE) {
       opr = as_oop_opr(reg);
     } else if (type == T_METADATA) {
       opr = as_metadata_opr(reg);
--- old/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	2019-01-24 17:46:39.250902898 +0000
+++ new/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	2019-01-24 17:46:38.538871254 +0000
@@ -558,6 +558,7 @@
       break;
     }
 
+    case T_VALUETYPE: 
     case T_OBJECT: {
         if (patch_code == lir_patch_none) {
           jobject2reg(c->as_jobject(), dest->as_register());
@@ -604,6 +605,7 @@
 void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
   LIR_Const* c = src->as_constant_ptr();
   switch (c->type()) {
+  case T_VALUETYPE: 
   case T_OBJECT:
     {
       if (! c->as_jobject())
@@ -670,6 +672,7 @@
     assert(c->as_jint() == 0, "should be");
     insn = &Assembler::strw;
     break;
+  case T_VALUETYPE: 
   case T_OBJECT:
   case T_ARRAY:
     assert(c->as_jobject() == 0, "should be");
@@ -710,13 +713,13 @@
       return;
     }
     assert(src->is_single_cpu(), "must match");
-    if (src->type() == T_OBJECT) {
+    if (src->type() == T_OBJECT || src->type() == T_VALUETYPE) {
       __ verify_oop(src->as_register());
     }
     move_regs(src->as_register(), dest->as_register());
 
   } else if (dest->is_double_cpu()) {
-    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
+    if (src->type() == T_OBJECT || src->type() == T_ARRAY || src->type() == T_VALUETYPE) {
       // Surprising to me but we can see move of a long to t_object
       __ verify_oop(src->as_register());
       move_regs(src->as_register(), dest->as_register_lo());
@@ -744,7 +747,7 @@
 
 void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
   if (src->is_single_cpu()) {
-    if (type == T_ARRAY || type == T_OBJECT) {
+    if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
       __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
       __ verify_oop(src->as_register());
     } else if (type == T_METADATA || type == T_DOUBLE) {
@@ -782,7 +785,7 @@
     return;
   }
 
-  if (type == T_ARRAY || type == T_OBJECT) {
+  if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
     __ verify_oop(src->as_register());
 
     if (UseCompressedOops && !wide) {
@@ -804,6 +807,7 @@
       break;
     }
 
+    case T_VALUETYPE: // fall through
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
       if (UseCompressedOops && !wide) {
@@ -857,7 +861,7 @@
   assert(dest->is_register(), "should not call otherwise");
 
   if (dest->is_single_cpu()) {
-    if (type == T_ARRAY || type == T_OBJECT) {
+    if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
       __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
       __ verify_oop(dest->as_register());
     } else if (type == T_METADATA) {
@@ -929,7 +933,7 @@
   LIR_Address* addr = src->as_address_ptr();
   LIR_Address* from_addr = src->as_address_ptr();
 
-  if (addr->base()->type() == T_OBJECT) {
+  if (addr->base()->type() == T_OBJECT || addr->base()->type() == T_VALUETYPE) { 
     __ verify_oop(addr->base()->as_pointer_register());
   }
 
@@ -953,6 +957,7 @@
       break;
     }
 
+    case T_VALUETYPE: // fall through
     case T_ARRAY:   // fall through
     case T_OBJECT:  // fall through
       if (UseCompressedOops && !wide) {
@@ -1007,7 +1012,7 @@
       ShouldNotReachHere();
   }
 
-  if (type == T_ARRAY || type == T_OBJECT) {
+  if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) {
     if (UseCompressedOops && !wide) {
       __ decode_heap_oop(dest->as_register());
     }
@@ -1210,7 +1215,7 @@
   Register len =  op->len()->as_register();
   __ uxtw(len, len);
 
-  if (UseSlowPath ||
+  if (UseSlowPath || op->type() == T_VALUETYPE ||
       (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
       (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
     __ b(*op->stub()->entry());
@@ -1928,10 +1933,10 @@
     if (opr2->is_single_cpu()) {
       // cpu register - cpu register
       Register reg2 = opr2->as_register();
-      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_VALUETYPE) {
         __ cmpoop(reg1, reg2);
       } else {
-        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
+        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_VALUETYPE,  "cmp int, oop?");
         __ cmpw(reg1, reg2);
       }
       return;
@@ -1958,6 +1963,7 @@
       case T_ADDRESS:
         imm = opr2->as_constant_ptr()->as_jint();
         break;
+      case T_VALUETYPE:
       case T_OBJECT:
       case T_ARRAY:
         jobject2reg(opr2->as_constant_ptr()->as_jobject(), rscratch1);
@@ -2125,6 +2131,7 @@
       }
       break;
     case T_LONG:
+    case T_VALUETYPE: 
     case T_ADDRESS:
     case T_OBJECT:
       switch (code) {
@@ -2161,6 +2168,7 @@
       break;
     case T_LONG:
     case T_ADDRESS:
+    case T_VALUETYPE:
     case T_OBJECT:
       switch (code) {
       case lir_shl:  __ lsl (dreg, lreg, count); break;
@@ -2889,6 +2897,7 @@
       case T_INT:
       case T_LONG:
       case T_OBJECT:
+      case T_VALUETYPE:
         type = 1;
         break;
       case T_FLOAT:
@@ -3155,6 +3164,7 @@
     xchg = &MacroAssembler::atomic_xchgal;
     add = &MacroAssembler::atomic_addal;
     break;
+  case T_VALUETYPE:
   case T_OBJECT:
   case T_ARRAY:
     if (UseCompressedOops) {
--- old/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	2019-01-24 17:46:41.551005118 +0000
+++ new/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	2019-01-24 17:46:40.862974541 +0000
@@ -35,6 +35,7 @@
 #include "ci/ciArray.hpp"
 #include "ci/ciObjArrayKlass.hpp"
 #include "ci/ciTypeArrayKlass.hpp"
+#include "ci/ciValueKlass.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "vmreg_aarch64.inline.hpp"
@@ -332,7 +333,7 @@
   LIR_Opr lock = new_register(T_INT);
   // Need a scratch register for biased locking
   LIR_Opr scratch = LIR_OprFact::illegalOpr;
-  if (UseBiasedLocking) {
+  if (UseBiasedLocking || x->maybe_valuetype()) {
     scratch = new_register(T_INT);
   }
 
@@ -340,11 +341,17 @@
   if (x->needs_null_check()) {
     info_for_exception = state_for(x);
   }
+
+  CodeStub* throw_imse_stub = 
+      x->maybe_valuetype() ?
+      new SimpleExceptionStub(Runtime1::throw_illegal_monitor_state_exception_id, LIR_OprFact::illegalOpr, state_for(x)) :
+      NULL;
+
   // this CodeEmitInfo must not have the xhandlers because here the
   // object is already locked (xhandlers expect object to be unlocked)
   CodeEmitInfo* info = state_for(x, x->state(), true);
   monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
-                        x->monitor_no(), info_for_exception, info);
+                        x->monitor_no(), info_for_exception, info, throw_imse_stub);
 }
 
 
@@ -1153,6 +1160,22 @@
   __ move(reg, result);
 }
 
+void LIRGenerator::do_NewValueTypeInstance  (NewValueTypeInstance* x) {
+  // Mapping to do_NewInstance (same code)
+  CodeEmitInfo* info = state_for(x, x->state());
+  x->set_to_object_type();
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(), x->is_unresolved(),
+             FrameMap::r2_oop_opr,
+             FrameMap::r5_oop_opr,
+             FrameMap::r4_oop_opr,
+             LIR_OprFact::illegalOpr,
+             FrameMap::r3_metadata_opr, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+
+}
+
 void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
   CodeEmitInfo* info = state_for(x, x->state());
 
@@ -1198,13 +1221,20 @@
   length.load_item_force(FrameMap::r19_opr);
   LIR_Opr len = length.result();
 
-  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  // DMS CHECK: Should we allocate slow path after BAILOUT?
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, false); 
+
   ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
   if (obj == ciEnv::unloaded_ciobjarrayklass()) {
     BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
   }
   klass2reg_with_patching(klass_reg, obj, patching_info);
-  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  if (obj->is_value_array_klass()) {
+    __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_VALUETYPE, klass_reg, slow_path);
+  } else {
+    __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+  }
 
   LIR_Opr result = rlock_result(x);
   __ move(reg, result);
@@ -1298,10 +1328,13 @@
   if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
     tmp3 = new_register(objectType);
   }
+
+
   __ checkcast(reg, obj.result(), x->klass(),
                new_register(objectType), new_register(objectType), tmp3,
                x->direct_compare(), info_for_exception, patching_info, stub,
-               x->profiled_method(), x->profiled_bci());
+               x->profiled_method(), x->profiled_bci(), x->is_never_null());
+
 }
 
 void LIRGenerator::do_InstanceOf(InstanceOf* x) {
--- old/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	2019-01-24 17:46:43.711101115 +0000
+++ new/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	2019-01-24 17:46:43.027070716 +0000
@@ -83,6 +83,12 @@
   ldr(hdr, Address(obj, hdr_offset));
   // and mark it as unlocked
   orr(hdr, hdr, markOopDesc::unlocked_value);
+
+  if (EnableValhalla && !UseBiasedLocking) {
+    // Mask always_locked bit such that we go to the slow path if object is a value type
+    andr(hdr, hdr, ~markOopDesc::biased_lock_bit_in_place);
+  }
+
   // save unlocked object header into the displaced header location on the stack
   str(hdr, Address(disp_hdr, 0));
   // test if object header is still the same (i.e. unlocked), and if so, store the
--- old/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	2019-01-24 17:46:45.851196222 +0000
+++ new/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp	2019-01-24 17:46:45.159165468 +0000
@@ -772,6 +772,7 @@
 
     case new_type_array_id:
     case new_object_array_id:
+    case new_value_array_id:
       {
         Register length   = r19; // Incoming
         Register klass    = r3; // Incoming
@@ -779,9 +780,13 @@
 
         if (id == new_type_array_id) {
           __ set_info("new_type_array", dont_gc_arguments);
-        } else {
+        }
+        else if (id == new_object_array_id) {
           __ set_info("new_object_array", dont_gc_arguments);
         }
+        else { 
+          __ set_info("new_value_array", dont_gc_arguments);
+        }
 
 #ifdef ASSERT
         // assert object type is really an array of the proper kind
@@ -790,9 +795,14 @@
           Register t0 = obj;
           __ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
           __ asrw(t0, t0, Klass::_lh_array_tag_shift);
-          int tag = ((id == new_type_array_id)
-                     ? Klass::_lh_array_tag_type_value
-                     : Klass::_lh_array_tag_obj_value);
+
+          int tag = 0;
+          switch (id) {
+           case new_type_array_id: tag = Klass::_lh_array_tag_type_value; break;
+           case new_object_array_id: tag = Klass::_lh_array_tag_obj_value; break;
+           case new_value_array_id: tag = Klass::_lh_array_tag_vt_value; break;
+           default:  ShouldNotReachHere();
+          }
           __ mov(rscratch1, tag);
           __ cmpw(t0, rscratch1);
           __ br(Assembler::EQ, ok);
@@ -852,6 +862,7 @@
         if (id == new_type_array_id) {
           call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
         } else {
+          // Runtime1::new_object_array handles both object and value arrays
           call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
         }
 
@@ -926,11 +937,17 @@
       break;
 
     case throw_incompatible_class_change_error_id:
-      { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+      { StubFrame f(sasm, "throw_incompatible_class_change_exception", dont_gc_arguments);
         oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
       }
       break;
 
+    case throw_illegal_monitor_state_exception_id:
+      { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false);
+      }
+      break;
+
     case slow_subtype_check_id:
       {
         // Typical calling sequence:
@@ -1122,8 +1139,7 @@
       }
       break;
 
-
-    default:
+    default: // DMS CHECK: we come here with id:0 and id:32 during VM intialization, should it be fixed?
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
         __ mov(r0, (int)id);
         __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
@@ -1132,6 +1148,8 @@
       break;
     }
   }
+
+
   return oop_maps;
 }
 
--- old/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp	2019-01-24 17:46:48.007292038 +0000
+++ new/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp	2019-01-24 17:46:47.315261284 +0000
@@ -40,7 +40,7 @@
 define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 false);
 define_pd_global(bool, UseOnStackReplacement,        true );
-define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(bool, TieredCompilation,            true);
 #ifdef BUILTIN_SIM
 // We compile very aggressively with the builtin simulator because
 // doing so greatly reduces run times and tests more code.
--- old/src/hotspot/cpu/aarch64/frame_aarch64.cpp	2019-01-24 17:46:50.131386431 +0000
+++ new/src/hotspot/cpu/aarch64/frame_aarch64.cpp	2019-01-24 17:46:49.423354967 +0000
@@ -593,6 +593,7 @@
   }
 
   switch (type) {
+    case T_VALUETYPE :
     case T_OBJECT  :
     case T_ARRAY   : {
       oop obj;
--- old/src/hotspot/cpu/aarch64/globals_aarch64.hpp	2019-01-24 17:46:52.275481712 +0000
+++ new/src/hotspot/cpu/aarch64/globals_aarch64.hpp	2019-01-24 17:46:51.587451137 +0000
@@ -68,6 +68,7 @@
 define_pd_global(bool, PreserveFramePointer, false);
 
 define_pd_global(bool, ValueTypePassFieldsAsArgs, false);
+define_pd_global(bool, ValueTypeReturnedAsFields, false);
 
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
--- old/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	2019-01-24 17:46:54.371574859 +0000
+++ new/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	2019-01-24 17:46:53.683544284 +0000
@@ -35,6 +35,7 @@
 #include "oops/markOop.hpp"
 #include "oops/method.hpp"
 #include "oops/methodData.hpp"
+#include "oops/valueKlass.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/basicLock.hpp"
@@ -672,6 +673,8 @@
 
     bind(no_reserved_zone_enabling);
   }
+
+  // DMS CHECK: ValueTypeReturnedAsFields support should be here
   // remove frame anchor
   leave();
   // If we're returning to interpreted code we will shortly be
@@ -725,6 +728,11 @@
     // Save (object->mark() | 1) into BasicLock's displaced header
     str(swap_reg, Address(lock_reg, mark_offset));
 
+    if (EnableValhalla && !UseBiasedLocking) { // DMS CHECK
+      // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking
+      andr(swap_reg, swap_reg, ~markOopDesc::biased_lock_bit_in_place);
+    }
+
     assert(lock_offset == 0,
            "displached header must be first word in BasicObjectLock");
 
--- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp	2019-01-24 17:46:56.583673159 +0000
+++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp	2019-01-24 17:46:55.871641518 +0000
@@ -256,6 +256,10 @@
   }
 }
 
+void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() {
+   pass_object();
+}
+
 void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
   // generate code to handle arguments
   iterate(fingerprint);
@@ -349,6 +353,11 @@
     }
   }
 
+  virtual void pass_valuetype() {
+    // values are handled with oops, like objects
+    pass_object();
+  }
+
   virtual void pass_float()
   {
     jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0));
--- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp	2019-01-24 17:46:58.939777858 +0000
+++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp	2019-01-24 17:46:58.071739285 +0000
@@ -44,6 +44,7 @@
   void pass_float();
   void pass_double();
   void pass_object();
+  void pass_valuetype();
 
  public:
   // Creation
--- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	2019-01-24 17:47:01.247880422 +0000
+++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	2019-01-24 17:47:00.387842205 +0000
@@ -1302,7 +1302,11 @@
 
 
 void MacroAssembler::verify_oop(Register reg, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   // Pass register number to verify_oop_subroutine
   const char* b = NULL;
@@ -1332,7 +1336,11 @@
 }
 
 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
-  if (!VerifyOops) return;
+  if (!VerifyOops || VerifyAdapterSharing) {
+    // Below address of the code string confuses VerifyAdapterSharing
+    // because it may differ between otherwise equivalent adapters.
+    return;
+  }
 
   const char* b = NULL;
   {
@@ -1435,6 +1443,10 @@
   call_VM_leaf_base(entry_point, 3);
 }
 
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
   pass_arg0(this, arg_0);
   MacroAssembler::call_VM_leaf_base(entry_point, 1);
@@ -1484,6 +1496,39 @@
   }
 }
 
+void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) {
+  ldrw(temp_reg, Address(klass, Klass::access_flags_offset()));
+  andr(temp_reg, temp_reg, JVM_ACC_VALUE);
+  cbnz(temp_reg, is_value); 
+}
+
+void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbnz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, is_flattenable);
+}
+
+void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& not_flattenable) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, not_flattenable);
+}
+
+void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) {
+  (void) temp_reg; // keep signature uniform with x86
+  tbnz(flags, ConstantPoolCacheEntry::is_flattened_field_shift, is_flattened);
+}
+
+void MacroAssembler::test_flat_array_klass(Register klass, Register temp_reg, Label& is_flattened) {
+  ldrw(temp_reg, Address(klass, Klass::layout_helper_offset()));
+  asrw(temp_reg, temp_reg, Klass::_lh_array_tag_shift);
+  cmpw(temp_reg, Klass::_lh_array_tag_vt_value);
+  br(Assembler::EQ, is_flattened);
+}
+
+void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flattened) {
+  load_klass(temp_reg, oop);
+  test_flat_array_klass(temp_reg, temp_reg, is_flattened);
+}
+
 // MacroAssembler protected routines needed to implement
 // public methods
 
@@ -5850,3 +5895,10 @@
 
   pop(saved_regs, sp);
 }
+
+// DMS TODO ValueType MachVVEPNode support
+void MacroAssembler::unpack_value_args(Compile* C) {
+  // Not implemented
+  guarantee(false, "Support for MachVVEPNode is not implemented");
+}
+
--- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	2019-01-24 17:47:03.803994005 +0000
+++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	2019-01-24 17:47:03.067961299 +0000
@@ -27,6 +27,8 @@
 #define CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP
 
 #include "asm/assembler.hpp"
+#include "runtime/signature.hpp"
+
 
 // MacroAssembler extends Assembler by frequently used macros.
 //
@@ -584,6 +586,18 @@
   static bool needs_explicit_null_check(intptr_t offset);
   static bool uses_implicit_null_check(void* address);
 
+  void test_klass_is_value(Register klass, Register temp_reg, Label& is_value);
+
+  void test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable);
+  void test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable);
+  void test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened);
+
+  // Check klass/oops is flat value type array (oop->_klass->_layout_helper & vt_bit)
+  void test_flat_array_klass(Register klass, Register temp_reg, Label& is_flat_array);
+  void test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array);
+
+
+
   static address target_addr_for_insn(address insn_addr, unsigned insn);
   static address target_addr_for_insn(address insn_addr) {
     unsigned insn = *(unsigned*)insn_addr;
@@ -1141,6 +1155,9 @@
 
   void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
 
+  // Support for MachVVEPNode
+  void unpack_value_args(Compile* C);
+
   void tableswitch(Register index, jint lowbound, jint highbound,
                    Label &jumptable, Label &jumptable_end, int stride = 1) {
     adr(rscratch1, jumptable);
--- old/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	2019-01-24 17:47:06.116096744 +0000
+++ new/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	2019-01-24 17:47:05.396064749 +0000
@@ -26,6 +26,7 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "classfile/symbolTable.hpp"
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
@@ -289,6 +290,7 @@
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS:
+    case T_VALUETYPE:
       if (int_args < Argument::n_int_register_parameters_j) {
         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
       } else {
@@ -322,6 +324,89 @@
   return align_up(stk_args, 2);
 }
 
+const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
+const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
+
+int SharedRuntime::java_return_convention(const BasicType *sig_bt,
+                                          VMRegPair *regs,
+                                          int total_args_passed) {
+
+  // Create the mapping between argument positions and
+  // registers.
+  static const Register INT_ArgReg[java_return_convention_max_int] = {
+    j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7
+  };
+  static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
+    j_farg0, j_farg1, j_farg2, j_farg3,
+    j_farg4, j_farg5, j_farg6, j_farg7
+  };
+
+
+  uint int_args = 0;
+  uint fp_args = 0;
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+        int_args ++;
+      } else {
+        // Should we have gurantee here?
+        return -1;
+      }
+      break;
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_LONG:
+      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      // Should T_METADATA be added to java_calling_convention as well ?
+    case T_METADATA:
+    case T_VALUETYPE:
+      if (int_args < Argument::n_int_register_parameters_j) {
+        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+        int_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+        fp_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    case T_DOUBLE:
+      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters_j) {
+        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+        fp_args ++;
+      } else {
+        return -1;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+   return int_args + fp_args;
+}
+
 // Patch the callers callsite with entry to compiled code if it exists.
 static void patch_callers_callsite(MacroAssembler *masm) {
   Label L;
@@ -352,12 +437,23 @@
   __ bind(L);
 }
 
+// For each value type argument, sig includes the list of fields of
+// the value type. This utility function computes the number of
+// arguments for the call if value types are passed by reference (the
+// calling convention the interpreter expects).
+static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
+  guarantee(ValueTypePassFieldsAsArgs == false, "Support for ValValueTypePassFieldsAsArgs = true is not implemented");
+
+  int total_args_passed = 0;
+  total_args_passed = sig_extended->length();
+  return total_args_passed;
+}
+
 static void gen_c2i_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
+                            const GrowableArray<SigEntry>* sig_extended,
                             const VMRegPair *regs,
                             Label& skip_fixup) {
+
   // Before we get into the guts of the C2I adapter, see if we should be here
   // at all.  We've come from compiled code and are attempting to jump to the
   // interpreter, which means the caller made a static call to get here
@@ -372,20 +468,21 @@
   // Since all args are passed on the stack, total_args_passed *
   // Interpreter::stackElementSize is the space we need.
 
+  int total_args_passed = compute_total_args_passed_int(sig_extended);
   int extraspace = total_args_passed * Interpreter::stackElementSize;
 
   __ mov(r13, sp);
 
   // stack is aligned, keep it that way
-  extraspace = align_up(extraspace, 2*wordSize);
-
+  extraspace = align_up(extraspace, 2 * wordSize);
   if (extraspace)
     __ sub(sp, sp, extraspace);
 
   // Now write the args into the outgoing interpreter space
   for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+    BasicType bt = sig_extended->at(i)._bt;
+    if (bt == T_VOID) {
+      //DMS TODO assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
       continue;
     }
 
@@ -414,9 +511,7 @@
     }
     if (r_1->is_stack()) {
       // memory to memory use rscratch1
-      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
-                    + extraspace
-                    + words_pushed * wordSize);
+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace + words_pushed * wordSize);
       if (!r_2->is_valid()) {
         // sign extend??
         __ ldrw(rscratch1, Address(sp, ld_off));
@@ -428,7 +523,7 @@
 
         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
         // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+        if ( bt == T_LONG || bt == T_DOUBLE) {
           // ld_off == LSW, ld_off+wordSize == MSW
           // st_off == MSW, next_off == LSW
           __ str(rscratch1, Address(sp, next_off));
@@ -450,7 +545,7 @@
       } else {
         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
         // T_DOUBLE and T_LONG use two slots in the interpreter
-        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+        if ( bt == T_LONG || bt == T_DOUBLE) {
           // long/double in gpr
 #ifdef ASSERT
           // Overwrite the unused slot with known junk
@@ -486,11 +581,11 @@
 
 
 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
-                                    int total_args_passed,
                                     int comp_args_on_stack,
-                                    const BasicType *sig_bt,
+                                    const GrowableArray<SigEntry>* sig,
                                     const VMRegPair *regs) {
 
+
   // Note: r13 contains the senderSP on entry. We must preserve it since
   // we may do a i2c -> c2i transition if we lose a race where compiled
   // code goes non-entrant while we get args ready.
@@ -571,10 +666,13 @@
   }
 #endif // INCLUDE_JVMCI
 
+  int total_args_passed = compute_total_args_passed_int(sig);
+
   // Now generate the shuffle code.
   for (int i = 0; i < total_args_passed; i++) {
-    if (sig_bt[i] == T_VOID) {
-      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+    BasicType bt = sig->at(i)._bt;
+    if (bt == T_VOID) {
+      //DMS TODO: assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
       continue;
     }
 
@@ -583,7 +681,7 @@
     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
             "scrambled load targets?");
     // Load in argument order going down.
-    int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
     // Point to interpreter value (vs. tag)
     int next_off = ld_off - Interpreter::stackElementSize;
     //
@@ -614,8 +712,7 @@
         // are accessed as negative so LSW is at LOW address
 
         // ld_off is MSW so get LSW
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
+        const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
         __ ldr(rscratch2, Address(esp, offset));
         // st_off is LSW (i.e. reg.first())
         __ str(rscratch2, Address(sp, st_off));
@@ -630,8 +727,7 @@
         // So we must adjust where to pick up the data to match the
         // interpreter.
 
-        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
-                           next_off : ld_off;
+        const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
 
         // this can be a misaligned move
         __ ldr(r, Address(esp, offset));
@@ -730,11 +826,14 @@
 
 // ---------------------------------------------------------------
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
-                                                            int total_args_passed,
                                                             int comp_args_on_stack,
-                                                            const BasicType *sig_bt,
-                                                            const VMRegPair *regs,
-                                                            AdapterFingerPrint* fingerprint) {
+                                                            int comp_args_on_stack_cc,
+                                                            const GrowableArray<SigEntry>* sig,
+                                                            const VMRegPair* regs,
+                                                            const GrowableArray<SigEntry>* sig_cc,
+                                                            const VMRegPair* regs_cc,
+                                                            AdapterFingerPrint* fingerprint,
+                                                            AdapterBlob*& new_adapter) {
   address i2c_entry = __ pc();
 #ifdef BUILTIN_SIM
   char *name = NULL;
@@ -750,7 +849,7 @@
     sim->notifyCompile(name, i2c_entry);
   }
 #endif
-  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+  gen_i2c_adapter(masm, comp_args_on_stack_cc, sig_cc, regs_cc);
 
   address c2i_unverified_entry = __ pc();
   Label skip_fixup;
@@ -790,6 +889,7 @@
   }
 
   address c2i_entry = __ pc();
+  address c2i_value_entry = c2i_entry;
 
 #ifdef BUILTIN_SIM
   if (name) {
@@ -800,10 +900,20 @@
   }
 #endif
 
-  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+  gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup);
 
   __ flush();
-  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+
+  OopMapSet* oop_maps = NULL;
+
+  int frame_complete = CodeOffsets::frame_never_safe;
+  int frame_size_in_words = 0;
+
+  // The c2i adapter might safepoint and trigger a GC. The caller must make sure that
+  // the GC knows about the location of oop argument locations passed to the c2i adapter.
+  bool caller_must_gc_arguments = (regs != regs_cc);
+  new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_unverified_entry);
 }
 
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
@@ -3194,3 +3304,101 @@
   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
 }
 #endif // COMPILER2_OR_JVMCI
+
+BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) {
+  BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K);
+  CodeBuffer buffer(buf);
+  short buffer_locs[20];
+  buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
+                                         sizeof(buffer_locs)/sizeof(relocInfo));
+
+  MacroAssembler _masm(&buffer);
+  MacroAssembler* masm = &_masm;
+
+  const Array<SigEntry>* sig_vk = vk->extended_sig();
+  const Array<VMRegPair>* regs = vk->return_regs();
+
+  int pack_fields_off = __ offset();
+
+  int j = 1;
+  for (int i = 0; i < sig_vk->length(); i++) {
+    BasicType bt = sig_vk->at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID) {
+      if (sig_vk->at(i-1)._bt == T_LONG ||
+          sig_vk->at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk->at(i)._offset;
+    VMRegPair pair = regs->at(j);
+    VMReg r_1 = pair.first();
+    VMReg r_2 = pair.second();
+    Address to(r0, off);
+    if (bt == T_FLOAT) { 
+      __ strs(r_1->as_FloatRegister(), to);
+    } else if (bt == T_DOUBLE) {
+      __ strd(r_1->as_FloatRegister(), to);
+    } else if (bt == T_OBJECT || bt == T_ARRAY) {
+      __ lea(r_1->as_Register(), to);
+    } else {
+      assert(is_java_primitive(bt), "unexpected basic type");
+      size_t size_in_bytes = type2aelembytes(bt);
+      __ store_sized_value(to, r_1->as_Register(), size_in_bytes);
+    }
+    j++;
+  }
+  assert(j == regs->length(), "missed a field?");
+
+  __ ret(r0);
+
+  int unpack_fields_off = __ offset();
+
+  j = 1;
+  for (int i = 0; i < sig_vk->length(); i++) {
+    BasicType bt = sig_vk->at(i)._bt;
+    if (bt == T_VALUETYPE) {
+      continue;
+    }
+    if (bt == T_VOID) {
+      if (sig_vk->at(i-1)._bt == T_LONG ||
+          sig_vk->at(i-1)._bt == T_DOUBLE) {
+        j++;
+      }
+      continue;
+    }
+    int off = sig_vk->at(i)._offset;
+    VMRegPair pair = regs->at(j);
+    VMReg r_1 = pair.first();
+    VMReg r_2 = pair.second();
+    Address from(r0, off);
+    if (bt == T_FLOAT) {
+      __ ldrs(r_1->as_FloatRegister(), from);
+    } else if (bt == T_DOUBLE) {
+      __ ldrd(r_1->as_FloatRegister(), from);
+    } else if (bt == T_OBJECT || bt == T_ARRAY) {
+      __ lea(r_1->as_Register(), from);
+    } else {
+      assert(is_java_primitive(bt), "unexpected basic type");
+      size_t size_in_bytes = type2aelembytes(bt);
+      __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
+    }
+    j++;
+  }
+  assert(j == regs->length(), "missed a field?");
+
+  // DMS CHECK:
+  if (StressValueTypeReturnedAsFields) {
+    __ load_klass(r0, r0);
+    __ orr(r0, r0, 1);
+  }
+
+  __ ret(r0);
+
+  __ flush();
+
+  return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off);
+}
--- old/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	2019-01-24 17:47:08.784215301 +0000
+++ new/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	2019-01-24 17:47:07.824172642 +0000
@@ -316,7 +316,7 @@
     return_address = __ pc();
 
     // store result depending on type (everything that is not
-    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    // T_OBJECT, T_VALUETYPE, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
     // n.b. this assumes Java returns an integral result in r0
     // and a floating result in j_farg0
     __ ldr(j_rarg2, result);
@@ -324,6 +324,8 @@
     __ ldr(j_rarg1, result_type);
     __ cmp(j_rarg1, (u1)T_OBJECT);
     __ br(Assembler::EQ, is_long);
+    __ cmp(j_rarg1, (u1)T_VALUETYPE);
+    __ br(Assembler::EQ, is_long);
     __ cmp(j_rarg1, (u1)T_LONG);
     __ br(Assembler::EQ, is_long);
     __ cmp(j_rarg1, (u1)T_FLOAT);
--- old/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	2019-01-24 17:47:11.244324614 +0000
+++ new/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	2019-01-24 17:47:10.528292797 +0000
@@ -566,6 +566,7 @@
   case T_VOID   : /* nothing to do */        break;
   case T_FLOAT  : /* nothing to do */        break;
   case T_DOUBLE : /* nothing to do */        break;
+  case T_VALUETYPE: // fall through (value types are handled with oops)
   case T_OBJECT :
     // retrieve result from frame
     __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
--- old/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	2019-01-24 17:47:13.532426282 +0000
+++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	2019-01-24 17:47:12.816394466 +0000
@@ -170,6 +170,7 @@
   Label L_patch_done;
 
   switch (bc) {
+  case Bytecodes::_fast_qputfield:
   case Bytecodes::_fast_aputfield:
   case Bytecodes::_fast_bputfield:
   case Bytecodes::_fast_zputfield:
@@ -808,11 +809,21 @@
   // r0: array
   // r1: index
   index_check(r0, r1); // leaves index in r1, kills rscratch1
-  __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-  do_oop_load(_masm,
-              Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)),
-              r0,
-              IS_ARRAY);
+  if (EnableValhalla && ValueArrayFlatten) {
+    Label is_flat_array, done;
+
+    __ test_flat_array_oop(r0, r10 /*temp*/, is_flat_array); 
+    __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+    do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY);
+
+    __ b(done);
+    __ bind(is_flat_array);
+    __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_load), r0, r1);
+    __ bind(done);
+  } else {
+    __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+    do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY);
+  }
 }
 
 void TemplateTable::baload()
@@ -1109,17 +1120,26 @@
   Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop));
 
   index_check(r3, r2);     // kills r1
-  __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+
+  // DMS CHECK: what does line below do?
+  __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); 
 
   // do array store check - check for NULL value first
   __ cbz(r0, is_null);
 
+  // Load array klass to r1, check if it is flat and bail out to ususal way
+  Label  is_flat_array;
+  if (ValueArrayFlatten) {
+    __ load_klass(r1, r3); 
+    __ test_flat_array_klass(r1, r10 /*temp*/, is_flat_array);
+  }
+
   // Move subklass into r1
   __ load_klass(r1, r0);
   // Move superklass into r0
   __ load_klass(r0, r3);
   __ ldr(r0, Address(r0,
-                     ObjArrayKlass::element_klass_offset()));
+         ObjArrayKlass::element_klass_offset()));
   // Compress array + index*oopSize + 12 into a single register.  Frees r2.
 
   // Generate subtype check.  Blows r2, r5
@@ -1143,8 +1163,64 @@
   __ bind(is_null);
   __ profile_null_seen(r2);
 
+  if (EnableValhalla) {
+    Label is_null_into_value_array_npe, store_null;
+
+    __ load_klass(r0, r3);
+    // No way to store null in flat array
+    __ test_flat_array_klass(r0, r1, is_null_into_value_array_npe);
+
+    // Use case for storing values in objArray where element_klass is specifically
+    // a value type because they could not be flattened "for reasons",
+    // these need to have the same semantics as flat arrays, i.e. NPE
+    __ ldr(r0, Address(r0, ObjArrayKlass::element_klass_offset()));
+    __ test_klass_is_value(r0, r1, is_null_into_value_array_npe);
+    __ b(store_null);
+
+    __ bind(is_null_into_value_array_npe);
+    __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
+
+    __ bind(store_null);
+  }
+
   // Store a NULL
   do_oop_store(_masm, element_address, noreg, IS_ARRAY);
+  __ b(done);
+
+
+  if (EnableValhalla) { 
+    // r0 - value, r2 - index, r3 - array. r1 - loaded array klass
+    // store non-null value
+    __ bind(is_flat_array);
+
+    // Simplistic type check...
+     Label is_type_ok;
+
+    // Profile the not-null value's klass.
+    // Load value class 
+    __ load_klass(r10, r0);
+    __ profile_typecheck(r2, r1, r0); // blows r2, and r0
+
+    // flat value array needs exact type match
+    // is "r10 == r0" (value subclass == array element superclass)
+  
+    // Move element klass into r0
+     __ ldr(r0, Address(r1, ArrayKlass::element_klass_offset())); 
+     __ cmp(r0, r10);
+     __ br(Assembler::EQ, is_type_ok);
+
+     __ profile_typecheck_failed(r2);
+     __ b(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
+     __ bind(is_type_ok);
+
+    // DMS CHECK: Reload from TOS to be safe, 
+    // DMS CHECK: Because of profile_typecheck that blows r2 and r0. Should we really do it?
+     __ ldr(r1, at_tos());  // value
+     __ mov(r2, r3); // array
+     __ ldr(r3, at_tos_p1()); // index
+     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_store), r1, r2, r3);
+  }
+
 
   // Pop stack arguments
   __ bind(done);
@@ -2025,10 +2101,25 @@
 {
   transition(atos, vtos);
   // assume branch is more often taken than not (loops use backward branches)
-  Label not_taken;
+  Label taken, not_taken;
   __ pop_ptr(r1);
   __ cmpoop(r1, r0);
+
+  if (EnableValhalla) { 
+    guarantee(UsePointerPerturbation == false, "UsePointerPerturbation is not implemented");
+
+    __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken);
+    __ cbz(r1, (cc == equal) ? taken : not_taken);
+    __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes()));  
+    // DMS CHECK: Is code below correct? 
+    __ andr(r2, r2, markOopDesc::always_locked_pattern && 0xF);
+    __ cmp(r2, (u1) markOopDesc::always_locked_pattern);
+    cc = (cc == equal) ? not_equal : equal;
+  }
+
+
   __ br(j_not(cc), not_taken);
+  __ bind(taken);
   branch(false, false);
   __ bind(not_taken);
   __ profile_not_taken_branch(r0);
@@ -2283,7 +2374,7 @@
     __ narrow(r0);
   }
 
-  __ remove_activation(state);
+  __ remove_activation(state); 
   __ ret(lr);
 }
 
@@ -2497,8 +2588,7 @@
 
   // x86 uses a shift and mask or wings it with a shift plus assert
   // the mask is not needed. aarch64 just uses bitfield extract
-  __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift,
-           ConstantPoolCacheEntry::tos_state_bits);
+  __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits);
 
   assert(btos == 0, "change code, btos != 0");
   __ cbnz(flags, notByte);
@@ -2533,12 +2623,68 @@
   __ cmp(flags, (u1)atos);
   __ br(Assembler::NE, notObj);
   // atos
-  do_oop_load(_masm, field, r0, IN_HEAP);
-  __ push(atos);
-  if (rc == may_rewrite) {
-    patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+  if (!EnableValhalla) {
+    do_oop_load(_masm, field, r0, IN_HEAP);
+    __ push(atos);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+    }  
+    __ b(Done);
+  } else { // Valhalla
+
+    if (is_static) {
+      __ load_heap_oop(r0, field);
+      Label isFlattenable, isUninitialized;
+      // Issue below if the static field has not been initialized yet
+      __ test_field_is_flattenable(raw_flags, r10, isFlattenable);
+        // Not flattenable case
+        __ push(atos);
+        __ b(Done);
+      // Flattenable case, must not return null even if uninitialized
+      __ bind(isFlattenable);
+        __ cbz(r0, isUninitialized);
+          __ push(atos);
+          __ b(Done);
+        __ bind(isUninitialized);
+          __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+          __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field), obj, raw_flags);
+          __ verify_oop(r0);
+          __ push(atos);
+          __ b(Done);
+    } else {
+      Label isFlattened, isInitialized, isFlattenable, rewriteFlattenable;
+        __ test_field_is_flattenable(raw_flags, r10, isFlattenable);
+        // Non-flattenable field case, also covers the object case
+        __ load_heap_oop(r0, field);
+        __ push(atos);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
+        }
+        __ b(Done);
+      __ bind(isFlattenable);
+        __ test_field_is_flattened(raw_flags, r10, isFlattened);
+         // Non-flattened field case
+          __ load_heap_oop(r0, field);
+          __ cbnz(r0, isInitialized);
+            __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+            __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), obj, raw_flags);
+          __ bind(isInitialized);
+          __ verify_oop(r0);
+          __ push(atos);
+          __ b(rewriteFlattenable);
+        __ bind(isFlattened);
+          __ ldr(r10, Address(cache, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+          __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask);
+          call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), obj, raw_flags, r10); 
+          __ verify_oop(r0);
+          __ push(atos);
+      __ bind(rewriteFlattenable);
+      if (rc == may_rewrite) { 
+         patch_bytecode(Bytecodes::_fast_qgetfield, bc, r1);
+      }
+      __ b(Done);
+    }
   }
-  __ b(Done);
 
   __ bind(notObj);
   __ cmp(flags, (u1)itos);
@@ -2708,6 +2854,7 @@
   const Register obj   = r2;
   const Register off   = r19;
   const Register flags = r0;
+  const Register flags2 = r6;
   const Register bc    = r4;
 
   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
@@ -2730,6 +2877,8 @@
   Label notByte, notBool, notInt, notShort, notChar,
         notLong, notFloat, notObj, notDouble;
 
+  __ mov(flags2, flags); 
+
   // x86 uses a shift and mask or wings it with a shift plus assert
   // the mask is not needed. aarch64 just uses bitfield extract
   __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
@@ -2772,14 +2921,56 @@
 
   // atos
   {
-    __ pop(atos);
-    if (!is_static) pop_and_check_object(obj);
-    // Store into the field
-    do_oop_store(_masm, field, r0, IN_HEAP);
-    if (rc == may_rewrite) {
-      patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
-    }
-    __ b(Done);
+     if (!EnableValhalla) {
+      __ pop(atos);
+      if (!is_static) pop_and_check_object(obj);
+      // Store into the field
+      do_oop_store(_masm, field, r0, IN_HEAP);
+      if (rc == may_rewrite) {
+        patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
+      }
+      __ b(Done);
+     } else { // Valhalla
+
+      __ pop(atos);
+      if (is_static) {
+        Label notFlattenable;
+         __ test_field_is_not_flattenable(flags2, r10, notFlattenable);
+         __ null_check(r0);
+         __ bind(notFlattenable);
+         do_oop_store(_masm, field, r0, IN_HEAP);
+         __ b(Done);
+      } else {
+        Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable;
+        __ test_field_is_flattenable(flags2, r10, isFlattenable);
+        // Not flattenable case, covers not flattenable values and objects
+        pop_and_check_object(obj);
+        // Store into the field
+        do_oop_store(_masm, field, r0, IN_HEAP);
+        __ bind(rewriteNotFlattenable);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); 
+        }
+        __ b(Done);
+        // Implementation of the flattenable semantic
+        __ bind(isFlattenable);
+        __ null_check(r0);
+        __ test_field_is_flattened(flags2, r10, isFlattened);
+        // Not flattened case
+        pop_and_check_object(obj);
+        // Store into the field
+        do_oop_store(_masm, field, r0, IN_HEAP);
+        __ b(rewriteFlattenable);
+        __ bind(isFlattened);
+        pop_and_check_object(obj);
+        call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, off, obj);
+        __ bind(rewriteFlattenable);
+        if (rc == may_rewrite) {
+          patch_bytecode(Bytecodes::_fast_qputfield, bc, r19, true, byte_no);
+        }
+        __ b(Done);
+      }
+     }  // Valhalla
   }
 
   __ bind(notObj);
@@ -2919,6 +3110,7 @@
     // to do it for every data type, we use the saved values as the
     // jvalue object.
     switch (bytecode()) {          // load values into the jvalue object
+    case Bytecodes::_fast_qputfield: //fall through
     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
     case Bytecodes::_fast_zputfield: // fall through
@@ -2945,6 +3137,7 @@
                r19, c_rarg2, c_rarg3);
 
     switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_qputfield: //fall through
     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
     case Bytecodes::_fast_bputfield: // fall through
     case Bytecodes::_fast_zputfield: // fall through
@@ -2995,6 +3188,19 @@
 
   // access field
   switch (bytecode()) {
+  case Bytecodes::_fast_qputfield: //fall through 
+   {
+      Label isFlattened, done; 
+      __ null_check(r0);
+      __ test_field_is_flattened(r3, r10, isFlattened);
+      // No Flattened case
+      do_oop_store(_masm, field, r0, IN_HEAP);
+      __ b(done);
+      __ bind(isFlattened);
+      call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, r1, r2);
+      __ bind(done);
+    }
+    break;
   case Bytecodes::_fast_aputfield:
     do_oop_store(_masm, field, r0, IN_HEAP);
     break;
@@ -3088,6 +3294,32 @@
 
   // access field
   switch (bytecode()) {
+  case Bytecodes::_fast_qgetfield: 
+    {
+       Label isFlattened, isInitialized, Done;
+       // DMS CHECK: We don't need to reload multiple times, but stay close to original code
+       __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); 
+       __ test_field_is_flattened(r10, r10, isFlattened);
+        // Non-flattened field case
+        __ mov(r10, r0);
+        __ load_heap_oop(r0, field);
+        __ cbnz(r0, isInitialized);
+          __ mov(r0, r10);
+          __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); 
+          __ andw(r10, r10, ConstantPoolCacheEntry::field_index_mask);
+          __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), r0, r10);
+        __ bind(isInitialized);
+        __ verify_oop(r0);
+        __ b(Done);
+      __ bind(isFlattened);
+        __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())));
+        __ andw(r10, r10, ConstantPoolCacheEntry::field_index_mask);
+        __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset())));
+        call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), r0, r10, r3);
+        __ verify_oop(r0);
+      __ bind(Done);
+    }
+    break;
   case Bytecodes::_fast_agetfield:
     do_oop_load(_masm, field, r0, IN_HEAP);
     __ verify_oop(r0);
@@ -3645,6 +3877,30 @@
   __ membar(Assembler::StoreStore);
 }
 
+void TemplateTable::defaultvalue() {
+  transition(vtos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  __ get_constant_pool(c_rarg1);
+  call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue),
+          c_rarg1, c_rarg2);
+  __ verify_oop(r0);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(Assembler::StoreStore);
+}
+
+void TemplateTable::withfield() {
+  transition(vtos, atos);
+  resolve_cache_and_index(f2_byte, c_rarg1 /*cache*/, c_rarg2 /*index*/, sizeof(u2));
+
+  // n.b. unlike x86 cache is now rcpool plus the indexed offset
+  // so using rcpool to meet shared code expectations
+ 
+  call_VM(r1, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), rcpool);
+  __ verify_oop(r1);
+  __ add(esp, esp, r0);
+  __ mov(r0, r1);
+}
+
 void TemplateTable::newarray() {
   transition(itos, atos);
   __ load_unsigned_byte(c_rarg1, at_bcp(1));
@@ -3716,14 +3972,29 @@
   __ bind(ok_is_subtype);
   __ mov(r0, r3); // Restore object in r3
 
+  __ b(done);
+  __ bind(is_null);
+
   // Collect counts on whether this test sees NULLs a lot or not.
   if (ProfileInterpreter) {
-    __ b(done);
-    __ bind(is_null);
     __ profile_null_seen(r2);
-  } else {
-    __ bind(is_null);   // same as 'done'
   }
+
+  if (EnableValhalla) {
+      // Get cpool & tags index
+      __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array
+      __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index
+      // See if bytecode has already been quicked
+      __ add(rscratch1, r3, Array<u1>::base_offset_in_bytes());
+      __ lea(r1, Address(rscratch1, r19));
+      __ ldarb(r1, r1);
+    // See if CP entry is a Q-descriptor
+    __ andr (r1, r1, JVM_CONSTANT_QDESC_BIT);
+    __ cmp(r1, (u1) JVM_CONSTANT_QDESC_BIT);
+    __ br(Assembler::NE, done);
+    __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
+  }
+
   __ bind(done);
 }