--- old/./.hgtags 2019-01-24 17:46:27.546382712 +0000 +++ new/./.hgtags 2019-01-24 17:46:26.822350532 +0000 @@ -535,3 +535,4 @@ 50677f43ac3df9a8684222b8893543c60f3aa0bd jdk-13+2 de9fd809bb475401aad188eab2264226788aad81 jdk-12+26 642346a11059b9f283110dc301a24ed43b76a94e jdk-13+3 +266942398494aec1ccaae0f3ec9e34e20c2747f8 AArch64_support_start --- old/src/hotspot/cpu/aarch64/aarch64.ad 2019-01-24 17:46:29.754480850 +0000 +++ new/src/hotspot/cpu/aarch64/aarch64.ad 2019-01-24 17:46:29.042449204 +0000 @@ -1955,6 +1955,30 @@ } //============================================================================= +#ifndef PRODUCT +void MachVVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("MachVVEPNode"); +} +#endif + +void MachVVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + // Unpack all value type args passed as oop and then jump to + // the verified entry point (skipping the unverified entry). + MacroAssembler _masm(&cbuf); + + __ unpack_value_args(ra_->C); + __ b(*_verified_entry); +} + +uint MachVVEPNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + + +//============================================================================= #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const @@ -2367,7 +2391,7 @@ { int gps = 0; int fps = 0; - const TypeTuple *domain = tf->domain(); + const TypeTuple *domain = tf->domain_cc(); int max = domain->cnt(); for (int i = TypeFunc::Parms; i < max; i++) { const Type *t = domain->field_at(i); @@ -7885,6 +7909,21 @@ ins_pipe(ialu_reg); %} +instruct castN2X(iRegLNoSp dst, iRegN src) %{ + match(Set dst (CastP2X src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# ptr -> long" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + instruct castP2X(iRegLNoSp dst, iRegP src) %{ match(Set dst (CastP2X src)); @@ -13398,9 +13437,9 @@ // ============================================================================ // clearing of an array -instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr) %{ - match(Set dummy (ClearArray cnt base)); + match(Set dummy (ClearArray (Binary cnt base) val)); effect(USE_KILL cnt, USE_KILL base); ins_cost(4 * INSN_COST); @@ -13413,11 +13452,12 @@ ins_pipe(pipe_class_memory); %} -instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr) %{ - predicate((u_int64_t)n->in(2)->get_long() + predicate((u_int64_t)n->in(3)->get_long() < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); - match(Set dummy (ClearArray cnt base)); + + match(Set dummy (ClearArray (Binary cnt base) val)); effect(USE_KILL base); ins_cost(4 * INSN_COST); --- old/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp 2019-01-24 17:46:32.654609742 +0000 +++ new/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp 2019-01-24 17:46:31.938577919 +0000 @@ -47,6 +47,7 @@ case T_DOUBLE : i = 8; break; case T_OBJECT : i = 9; break; case T_ARRAY : i = 9; break; + case T_VALUETYPE : i = 10; break; default : ShouldNotReachHere(); } assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, --- old/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-01-24 17:46:34.918710366 +0000 +++ new/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-01-24 17:46:34.158676588 +0000 @@ -175,11 +175,12 @@ // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info, bool is_value_type) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_value_type = is_value_type; } @@ -188,7 +189,13 @@ __ bind(_entry); assert(_length->as_register() == r19, "length must in r19,"); assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); - __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + + if (_is_value_type) { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_value_array_id))); + } else { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + } + ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0"); @@ -196,16 +203,30 @@ } // Implementation of MonitorAccessStubs -MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info, CodeStub* throw_imse_stub, LIR_Opr scratch_reg) : MonitorAccessStub(obj_reg, lock_reg) { _info = new CodeEmitInfo(info); + _scratch_reg = scratch_reg; + _throw_imse_stub = throw_imse_stub; + if (_throw_imse_stub != NULL) { + assert(_scratch_reg != LIR_OprFact::illegalOpr, "must be"); + } } void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); + if (_throw_imse_stub != NULL) { + // When we come here, _obj_reg has already been checked to be non-null. + Register mark = _scratch_reg->as_register(); + __ ldr(mark, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes())); + __ andr(mark, mark, (u1) markOopDesc::always_locked_pattern && 0xF); + __ cmp(r2, (u1) markOopDesc::always_locked_pattern); + __ br(Assembler::NE, *_throw_imse_stub->entry()); + } + ce->store_parameter(_obj_reg->as_register(), 1); ce->store_parameter(_lock_reg->as_register(), 0); Runtime1::StubID enter_id; --- old/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp 2019-01-24 17:46:37.114807966 +0000 +++ new/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp 2019-01-24 17:46:36.406776500 +0000 @@ -45,7 +45,7 @@ Register reg2 = r_2->as_Register(); assert(reg2 == reg, "must be same register"); opr = as_long_opr(reg); - } else if (type == T_OBJECT || type == T_ARRAY) { + } else if (type == T_OBJECT || type == T_ARRAY || type == T_VALUETYPE) { opr = as_oop_opr(reg); } else if (type == T_METADATA) { opr = as_metadata_opr(reg); --- old/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-01-24 17:46:39.250902898 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-01-24 17:46:38.538871254 +0000 @@ -558,6 +558,7 @@ break; } + case T_VALUETYPE: case T_OBJECT: { if (patch_code == lir_patch_none) { jobject2reg(c->as_jobject(), dest->as_register()); @@ -604,6 +605,7 @@ void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { LIR_Const* c = src->as_constant_ptr(); switch (c->type()) { + case T_VALUETYPE: case T_OBJECT: { if (! c->as_jobject()) @@ -670,6 +672,7 @@ assert(c->as_jint() == 0, "should be"); insn = &Assembler::strw; break; + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: assert(c->as_jobject() == 0, "should be"); @@ -710,13 +713,13 @@ return; } assert(src->is_single_cpu(), "must match"); - if (src->type() == T_OBJECT) { + if (src->type() == T_OBJECT || src->type() == T_VALUETYPE) { __ verify_oop(src->as_register()); } move_regs(src->as_register(), dest->as_register()); } else if (dest->is_double_cpu()) { - if (src->type() == T_OBJECT || src->type() == T_ARRAY) { + if (src->type() == T_OBJECT || src->type() == T_ARRAY || src->type() == T_VALUETYPE) { // Surprising to me but we can see move of a long to t_object __ verify_oop(src->as_register()); move_regs(src->as_register(), dest->as_register_lo()); @@ -744,7 +747,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { if (src->is_single_cpu()) { - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); __ verify_oop(src->as_register()); } else if (type == T_METADATA || type == T_DOUBLE) { @@ -782,7 +785,7 @@ return; } - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ verify_oop(src->as_register()); if (UseCompressedOops && !wide) { @@ -804,6 +807,7 @@ break; } + case T_VALUETYPE: // fall through case T_ARRAY: // fall through case T_OBJECT: // fall through if (UseCompressedOops && !wide) { @@ -857,7 +861,7 @@ assert(dest->is_register(), "should not call otherwise"); if (dest->is_single_cpu()) { - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); __ verify_oop(dest->as_register()); } else if (type == T_METADATA) { @@ -929,7 +933,7 @@ LIR_Address* addr = src->as_address_ptr(); LIR_Address* from_addr = src->as_address_ptr(); - if (addr->base()->type() == T_OBJECT) { + if (addr->base()->type() == T_OBJECT || addr->base()->type() == T_VALUETYPE) { __ verify_oop(addr->base()->as_pointer_register()); } @@ -953,6 +957,7 @@ break; } + case T_VALUETYPE: // fall through case T_ARRAY: // fall through case T_OBJECT: // fall through if (UseCompressedOops && !wide) { @@ -1007,7 +1012,7 @@ ShouldNotReachHere(); } - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { if (UseCompressedOops && !wide) { __ decode_heap_oop(dest->as_register()); } @@ -1210,7 +1215,7 @@ Register len = op->len()->as_register(); __ uxtw(len, len); - if (UseSlowPath || + if (UseSlowPath || op->type() == T_VALUETYPE || (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { __ b(*op->stub()->entry()); @@ -1928,10 +1933,10 @@ if (opr2->is_single_cpu()) { // cpu register - cpu register Register reg2 = opr2->as_register(); - if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_VALUETYPE) { __ cmpoop(reg1, reg2); } else { - assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?"); + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_VALUETYPE, "cmp int, oop?"); __ cmpw(reg1, reg2); } return; @@ -1958,6 +1963,7 @@ case T_ADDRESS: imm = opr2->as_constant_ptr()->as_jint(); break; + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: jobject2reg(opr2->as_constant_ptr()->as_jobject(), rscratch1); @@ -2125,6 +2131,7 @@ } break; case T_LONG: + case T_VALUETYPE: case T_ADDRESS: case T_OBJECT: switch (code) { @@ -2161,6 +2168,7 @@ break; case T_LONG: case T_ADDRESS: + case T_VALUETYPE: case T_OBJECT: switch (code) { case lir_shl: __ lsl (dreg, lreg, count); break; @@ -2889,6 +2897,7 @@ case T_INT: case T_LONG: case T_OBJECT: + case T_VALUETYPE: type = 1; break; case T_FLOAT: @@ -3155,6 +3164,7 @@ xchg = &MacroAssembler::atomic_xchgal; add = &MacroAssembler::atomic_addal; break; + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: if (UseCompressedOops) { --- old/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2019-01-24 17:46:41.551005118 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2019-01-24 17:46:40.862974541 +0000 @@ -35,6 +35,7 @@ #include "ci/ciArray.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" +#include "ci/ciValueKlass.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "vmreg_aarch64.inline.hpp" @@ -332,7 +333,7 @@ LIR_Opr lock = new_register(T_INT); // Need a scratch register for biased locking LIR_Opr scratch = LIR_OprFact::illegalOpr; - if (UseBiasedLocking) { + if (UseBiasedLocking || x->maybe_valuetype()) { scratch = new_register(T_INT); } @@ -340,11 +341,17 @@ if (x->needs_null_check()) { info_for_exception = state_for(x); } + + CodeStub* throw_imse_stub = + x->maybe_valuetype() ? + new SimpleExceptionStub(Runtime1::throw_illegal_monitor_state_exception_id, LIR_OprFact::illegalOpr, state_for(x)) : + NULL; + // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); monitor_enter(obj.result(), lock, syncTempOpr(), scratch, - x->monitor_no(), info_for_exception, info); + x->monitor_no(), info_for_exception, info, throw_imse_stub); } @@ -1153,6 +1160,22 @@ __ move(reg, result); } +void LIRGenerator::do_NewValueTypeInstance (NewValueTypeInstance* x) { + // Mapping to do_NewInstance (same code) + CodeEmitInfo* info = state_for(x, x->state()); + x->set_to_object_type(); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r2_oop_opr, + FrameMap::r5_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); + +} + void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { CodeEmitInfo* info = state_for(x, x->state()); @@ -1198,13 +1221,20 @@ length.load_item_force(FrameMap::r19_opr); LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + // DMS CHECK: Should we allocate slow path after BAILOUT? + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, false); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + if (obj->is_value_array_klass()) { + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_VALUETYPE, klass_reg, slow_path); + } else { + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + } LIR_Opr result = rlock_result(x); __ move(reg, result); @@ -1298,10 +1328,13 @@ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { tmp3 = new_register(objectType); } + + __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_never_null()); + } void LIRGenerator::do_InstanceOf(InstanceOf* x) { --- old/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-01-24 17:46:43.711101115 +0000 +++ new/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-01-24 17:46:43.027070716 +0000 @@ -83,6 +83,12 @@ ldr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked orr(hdr, hdr, markOopDesc::unlocked_value); + + if (EnableValhalla && !UseBiasedLocking) { + // Mask always_locked bit such that we go to the slow path if object is a value type + andr(hdr, hdr, ~markOopDesc::biased_lock_bit_in_place); + } + // save unlocked object header into the displaced header location on the stack str(hdr, Address(disp_hdr, 0)); // test if object header is still the same (i.e. unlocked), and if so, store the --- old/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-01-24 17:46:45.851196222 +0000 +++ new/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-01-24 17:46:45.159165468 +0000 @@ -772,6 +772,7 @@ case new_type_array_id: case new_object_array_id: + case new_value_array_id: { Register length = r19; // Incoming Register klass = r3; // Incoming @@ -779,9 +780,13 @@ if (id == new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } + else if (id == new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); } + else { + __ set_info("new_value_array", dont_gc_arguments); + } #ifdef ASSERT // assert object type is really an array of the proper kind @@ -790,9 +795,14 @@ Register t0 = obj; __ ldrw(t0, Address(klass, Klass::layout_helper_offset())); __ asrw(t0, t0, Klass::_lh_array_tag_shift); - int tag = ((id == new_type_array_id) - ? Klass::_lh_array_tag_type_value - : Klass::_lh_array_tag_obj_value); + + int tag = 0; + switch (id) { + case new_type_array_id: tag = Klass::_lh_array_tag_type_value; break; + case new_object_array_id: tag = Klass::_lh_array_tag_obj_value; break; + case new_value_array_id: tag = Klass::_lh_array_tag_vt_value; break; + default: ShouldNotReachHere(); + } __ mov(rscratch1, tag); __ cmpw(t0, rscratch1); __ br(Assembler::EQ, ok); @@ -852,6 +862,7 @@ if (id == new_type_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); } else { + // Runtime1::new_object_array handles both object and value arrays call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); } @@ -926,11 +937,17 @@ break; case throw_incompatible_class_change_error_id: - { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + { StubFrame f(sasm, "throw_incompatible_class_change_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case throw_illegal_monitor_state_exception_id: + { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + case slow_subtype_check_id: { // Typical calling sequence: @@ -1122,8 +1139,7 @@ } break; - - default: + default: // DMS CHECK: we come here with id:0 and id:32 during VM intialization, should it be fixed? { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ mov(r0, (int)id); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); @@ -1132,6 +1148,8 @@ break; } } + + return oop_maps; } --- old/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp 2019-01-24 17:46:48.007292038 +0000 +++ new/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp 2019-01-24 17:46:47.315261284 +0000 @@ -40,7 +40,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, false); define_pd_global(bool, UseOnStackReplacement, true ); -define_pd_global(bool, TieredCompilation, false); +define_pd_global(bool, TieredCompilation, true); #ifdef BUILTIN_SIM // We compile very aggressively with the builtin simulator because // doing so greatly reduces run times and tests more code. --- old/src/hotspot/cpu/aarch64/frame_aarch64.cpp 2019-01-24 17:46:50.131386431 +0000 +++ new/src/hotspot/cpu/aarch64/frame_aarch64.cpp 2019-01-24 17:46:49.423354967 +0000 @@ -593,6 +593,7 @@ } switch (type) { + case T_VALUETYPE : case T_OBJECT : case T_ARRAY : { oop obj; --- old/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-01-24 17:46:52.275481712 +0000 +++ new/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-01-24 17:46:51.587451137 +0000 @@ -68,6 +68,7 @@ define_pd_global(bool, PreserveFramePointer, false); define_pd_global(bool, ValueTypePassFieldsAsArgs, false); +define_pd_global(bool, ValueTypeReturnedAsFields, false); // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread --- old/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-01-24 17:46:54.371574859 +0000 +++ new/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-01-24 17:46:53.683544284 +0000 @@ -35,6 +35,7 @@ #include "oops/markOop.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" +#include "oops/valueKlass.hpp" #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" @@ -672,6 +673,8 @@ bind(no_reserved_zone_enabling); } + + // DMS CHECK: ValueTypeReturnedAsFields support should be here // remove frame anchor leave(); // If we're returning to interpreted code we will shortly be @@ -725,6 +728,11 @@ // Save (object->mark() | 1) into BasicLock's displaced header str(swap_reg, Address(lock_reg, mark_offset)); + if (EnableValhalla && !UseBiasedLocking) { // DMS CHECK + // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking + andr(swap_reg, swap_reg, ~markOopDesc::biased_lock_bit_in_place); + } + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); --- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp 2019-01-24 17:46:56.583673159 +0000 +++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp 2019-01-24 17:46:55.871641518 +0000 @@ -256,6 +256,10 @@ } } +void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() { + pass_object(); +} + void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { // generate code to handle arguments iterate(fingerprint); @@ -349,6 +353,11 @@ } } + virtual void pass_valuetype() { + // values are handled with oops, like objects + pass_object(); + } + virtual void pass_float() { jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0)); --- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp 2019-01-24 17:46:58.939777858 +0000 +++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp 2019-01-24 17:46:58.071739285 +0000 @@ -44,6 +44,7 @@ void pass_float(); void pass_double(); void pass_object(); + void pass_valuetype(); public: // Creation --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-01-24 17:47:01.247880422 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-01-24 17:47:00.387842205 +0000 @@ -1302,7 +1302,11 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } // Pass register number to verify_oop_subroutine const char* b = NULL; @@ -1332,7 +1336,11 @@ } void MacroAssembler::verify_oop_addr(Address addr, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } const char* b = NULL; { @@ -1435,6 +1443,10 @@ call_VM_leaf_base(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -1484,6 +1496,39 @@ } } +void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) { + ldrw(temp_reg, Address(klass, Klass::access_flags_offset())); + andr(temp_reg, temp_reg, JVM_ACC_VALUE); + cbnz(temp_reg, is_value); +} + +void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) { + (void) temp_reg; // keep signature uniform with x86 + tbnz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, is_flattenable); +} + +void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& not_flattenable) { + (void) temp_reg; // keep signature uniform with x86 + tbz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, not_flattenable); +} + +void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) { + (void) temp_reg; // keep signature uniform with x86 + tbnz(flags, ConstantPoolCacheEntry::is_flattened_field_shift, is_flattened); +} + +void MacroAssembler::test_flat_array_klass(Register klass, Register temp_reg, Label& is_flattened) { + ldrw(temp_reg, Address(klass, Klass::layout_helper_offset())); + asrw(temp_reg, temp_reg, Klass::_lh_array_tag_shift); + cmpw(temp_reg, Klass::_lh_array_tag_vt_value); + br(Assembler::EQ, is_flattened); +} + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, Label& is_flattened) { + load_klass(temp_reg, oop); + test_flat_array_klass(temp_reg, temp_reg, is_flattened); +} + // MacroAssembler protected routines needed to implement // public methods @@ -5850,3 +5895,10 @@ pop(saved_regs, sp); } + +// DMS TODO ValueType MachVVEPNode support +void MacroAssembler::unpack_value_args(Compile* C) { + // Not implemented + guarantee(false, "Support for MachVVEPNode is not implemented"); +} + --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-01-24 17:47:03.803994005 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-01-24 17:47:03.067961299 +0000 @@ -27,6 +27,8 @@ #define CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP #include "asm/assembler.hpp" +#include "runtime/signature.hpp" + // MacroAssembler extends Assembler by frequently used macros. // @@ -584,6 +586,18 @@ static bool needs_explicit_null_check(intptr_t offset); static bool uses_implicit_null_check(void* address); + void test_klass_is_value(Register klass, Register temp_reg, Label& is_value); + + void test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable); + void test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable); + void test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened); + + // Check klass/oops is flat value type array (oop->_klass->_layout_helper & vt_bit) + void test_flat_array_klass(Register klass, Register temp_reg, Label& is_flat_array); + void test_flat_array_oop(Register oop, Register temp_reg, Label& is_flat_array); + + + static address target_addr_for_insn(address insn_addr, unsigned insn); static address target_addr_for_insn(address insn_addr) { unsigned insn = *(unsigned*)insn_addr; @@ -1141,6 +1155,9 @@ void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); + // Support for MachVVEPNode + void unpack_value_args(Compile* C); + void tableswitch(Register index, jint lowbound, jint highbound, Label &jumptable, Label &jumptable_end, int stride = 1) { adr(rscratch1, jumptable); --- old/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-01-24 17:47:06.116096744 +0000 +++ new/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-01-24 17:47:05.396064749 +0000 @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" @@ -289,6 +290,7 @@ case T_OBJECT: case T_ARRAY: case T_ADDRESS: + case T_VALUETYPE: if (int_args < Argument::n_int_register_parameters_j) { regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); } else { @@ -322,6 +324,89 @@ return align_up(stk_args, 2); } +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[java_return_convention_max_int] = { + j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7 + }; + static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + int_args ++; + } else { + // Should we have gurantee here? + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + // Should T_METADATA be added to java_calling_convention as well ? + case T_METADATA: + case T_VALUETYPE: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -352,12 +437,23 @@ __ bind(L); } +// For each value type argument, sig includes the list of fields of +// the value type. This utility function computes the number of +// arguments for the call if value types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + guarantee(ValueTypePassFieldsAsArgs == false, "Support for ValValueTypePassFieldsAsArgs = true is not implemented"); + + int total_args_passed = 0; + total_args_passed = sig_extended->length(); + return total_args_passed; +} + static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig_extended, const VMRegPair *regs, Label& skip_fixup) { + // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -372,20 +468,21 @@ // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. + int total_args_passed = compute_total_args_passed_int(sig_extended); int extraspace = total_args_passed * Interpreter::stackElementSize; __ mov(r13, sp); // stack is aligned, keep it that way - extraspace = align_up(extraspace, 2*wordSize); - + extraspace = align_up(extraspace, 2 * wordSize); if (extraspace) __ sub(sp, sp, extraspace); // Now write the args into the outgoing interpreter space for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig_extended->at(i)._bt; + if (bt == T_VOID) { + //DMS TODO assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); continue; } @@ -414,9 +511,7 @@ } if (r_1->is_stack()) { // memory to memory use rscratch1 - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size - + extraspace - + words_pushed * wordSize); + int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace + words_pushed * wordSize); if (!r_2->is_valid()) { // sign extend?? __ ldrw(rscratch1, Address(sp, ld_off)); @@ -428,7 +523,7 @@ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if ( bt == T_LONG || bt == T_DOUBLE) { // ld_off == LSW, ld_off+wordSize == MSW // st_off == MSW, next_off == LSW __ str(rscratch1, Address(sp, next_off)); @@ -450,7 +545,7 @@ } else { // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + if ( bt == T_LONG || bt == T_DOUBLE) { // long/double in gpr #ifdef ASSERT // Overwrite the unused slot with known junk @@ -486,11 +581,11 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { + // Note: r13 contains the senderSP on entry. We must preserve it since // we may do a i2c -> c2i transition if we lose a race where compiled // code goes non-entrant while we get args ready. @@ -571,10 +666,13 @@ } #endif // INCLUDE_JVMCI + int total_args_passed = compute_total_args_passed_int(sig); + // Now generate the shuffle code. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + if (bt == T_VOID) { + //DMS TODO: assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); continue; } @@ -583,7 +681,7 @@ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); // Load in argument order going down. - int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize; + int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; // Point to interpreter value (vs. tag) int next_off = ld_off - Interpreter::stackElementSize; // @@ -614,8 +712,7 @@ // are accessed as negative so LSW is at LOW address // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; __ ldr(rscratch2, Address(esp, offset)); // st_off is LSW (i.e. reg.first()) __ str(rscratch2, Address(sp, st_off)); @@ -630,8 +727,7 @@ // So we must adjust where to pick up the data to match the // interpreter. - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; // this can be a misaligned move __ ldr(r, Address(esp, offset)); @@ -730,11 +826,14 @@ // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { + int comp_args_on_stack_cc, + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + AdapterFingerPrint* fingerprint, + AdapterBlob*& new_adapter) { address i2c_entry = __ pc(); #ifdef BUILTIN_SIM char *name = NULL; @@ -750,7 +849,7 @@ sim->notifyCompile(name, i2c_entry); } #endif - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, comp_args_on_stack_cc, sig_cc, regs_cc); address c2i_unverified_entry = __ pc(); Label skip_fixup; @@ -790,6 +889,7 @@ } address c2i_entry = __ pc(); + address c2i_value_entry = c2i_entry; #ifdef BUILTIN_SIM if (name) { @@ -800,10 +900,20 @@ } #endif - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup); __ flush(); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + + OopMapSet* oop_maps = NULL; + + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; + + // The c2i adapter might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + bool caller_must_gc_arguments = (regs != regs_cc); + new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_unverified_entry); } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -3194,3 +3304,101 @@ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); } #endif // COMPILER2_OR_JVMCI + +BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) { + BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K); + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler _masm(&buffer); + MacroAssembler* masm = &_masm; + + const Array* sig_vk = vk->extended_sig(); + const Array* regs = vk->return_regs(); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address to(r0, off); + if (bt == T_FLOAT) { + __ strs(r_1->as_FloatRegister(), to); + } else if (bt == T_DOUBLE) { + __ strd(r_1->as_FloatRegister(), to); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + __ lea(r_1->as_Register(), to); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + size_t size_in_bytes = type2aelembytes(bt); + __ store_sized_value(to, r_1->as_Register(), size_in_bytes); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ ret(r0); + + int unpack_fields_off = __ offset(); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(r0, off); + if (bt == T_FLOAT) { + __ ldrs(r_1->as_FloatRegister(), from); + } else if (bt == T_DOUBLE) { + __ ldrd(r_1->as_FloatRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + __ lea(r_1->as_Register(), from); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + // DMS CHECK: + if (StressValueTypeReturnedAsFields) { + __ load_klass(r0, r0); + __ orr(r0, r0, 1); + } + + __ ret(r0); + + __ flush(); + + return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off); +} --- old/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-01-24 17:47:08.784215301 +0000 +++ new/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-01-24 17:47:07.824172642 +0000 @@ -316,7 +316,7 @@ return_address = __ pc(); // store result depending on type (everything that is not - // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + // T_OBJECT, T_VALUETYPE, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 __ ldr(j_rarg2, result); @@ -324,6 +324,8 @@ __ ldr(j_rarg1, result_type); __ cmp(j_rarg1, (u1)T_OBJECT); __ br(Assembler::EQ, is_long); + __ cmp(j_rarg1, (u1)T_VALUETYPE); + __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_LONG); __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_FLOAT); --- old/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-01-24 17:47:11.244324614 +0000 +++ new/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-01-24 17:47:10.528292797 +0000 @@ -566,6 +566,7 @@ case T_VOID : /* nothing to do */ break; case T_FLOAT : /* nothing to do */ break; case T_DOUBLE : /* nothing to do */ break; + case T_VALUETYPE: // fall through (value types are handled with oops) case T_OBJECT : // retrieve result from frame __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); --- old/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-01-24 17:47:13.532426282 +0000 +++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-01-24 17:47:12.816394466 +0000 @@ -170,6 +170,7 @@ Label L_patch_done; switch (bc) { + case Bytecodes::_fast_qputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -808,11 +809,21 @@ // r0: array // r1: index index_check(r0, r1); // leaves index in r1, kills rscratch1 - __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - do_oop_load(_masm, - Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), - r0, - IS_ARRAY); + if (EnableValhalla && ValueArrayFlatten) { + Label is_flat_array, done; + + __ test_flat_array_oop(r0, r10 /*temp*/, is_flat_array); + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + + __ b(done); + __ bind(is_flat_array); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_load), r0, r1); + __ bind(done); + } else { + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + } } void TemplateTable::baload() @@ -1109,17 +1120,26 @@ Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop)); index_check(r3, r2); // kills r1 - __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + + // DMS CHECK: what does line below do? + __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); // do array store check - check for NULL value first __ cbz(r0, is_null); + // Load array klass to r1, check if it is flat and bail out to ususal way + Label is_flat_array; + if (ValueArrayFlatten) { + __ load_klass(r1, r3); + __ test_flat_array_klass(r1, r10 /*temp*/, is_flat_array); + } + // Move subklass into r1 __ load_klass(r1, r0); // Move superklass into r0 __ load_klass(r0, r3); __ ldr(r0, Address(r0, - ObjArrayKlass::element_klass_offset())); + ObjArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees r2. // Generate subtype check. Blows r2, r5 @@ -1143,8 +1163,64 @@ __ bind(is_null); __ profile_null_seen(r2); + if (EnableValhalla) { + Label is_null_into_value_array_npe, store_null; + + __ load_klass(r0, r3); + // No way to store null in flat array + __ test_flat_array_klass(r0, r1, is_null_into_value_array_npe); + + // Use case for storing values in objArray where element_klass is specifically + // a value type because they could not be flattened "for reasons", + // these need to have the same semantics as flat arrays, i.e. NPE + __ ldr(r0, Address(r0, ObjArrayKlass::element_klass_offset())); + __ test_klass_is_value(r0, r1, is_null_into_value_array_npe); + __ b(store_null); + + __ bind(is_null_into_value_array_npe); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } + // Store a NULL do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ b(done); + + + if (EnableValhalla) { + // r0 - value, r2 - index, r3 - array. r1 - loaded array klass + // store non-null value + __ bind(is_flat_array); + + // Simplistic type check... + Label is_type_ok; + + // Profile the not-null value's klass. + // Load value class + __ load_klass(r10, r0); + __ profile_typecheck(r2, r1, r0); // blows r2, and r0 + + // flat value array needs exact type match + // is "r10 == r0" (value subclass == array element superclass) + + // Move element klass into r0 + __ ldr(r0, Address(r1, ArrayKlass::element_klass_offset())); + __ cmp(r0, r10); + __ br(Assembler::EQ, is_type_ok); + + __ profile_typecheck_failed(r2); + __ b(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry)); + __ bind(is_type_ok); + + // DMS CHECK: Reload from TOS to be safe, + // DMS CHECK: Because of profile_typecheck that blows r2 and r0. Should we really do it? + __ ldr(r1, at_tos()); // value + __ mov(r2, r3); // array + __ ldr(r3, at_tos_p1()); // index + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_store), r1, r2, r3); + } + // Pop stack arguments __ bind(done); @@ -2025,10 +2101,25 @@ { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(r1); __ cmpoop(r1, r0); + + if (EnableValhalla) { + guarantee(UsePointerPerturbation == false, "UsePointerPerturbation is not implemented"); + + __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken); + __ cbz(r1, (cc == equal) ? taken : not_taken); + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + // DMS CHECK: Is code below correct? + __ andr(r2, r2, markOopDesc::always_locked_pattern && 0xF); + __ cmp(r2, (u1) markOopDesc::always_locked_pattern); + cc = (cc == equal) ? not_equal : equal; + } + + __ br(j_not(cc), not_taken); + __ bind(taken); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(r0); @@ -2283,7 +2374,7 @@ __ narrow(r0); } - __ remove_activation(state); + __ remove_activation(state); __ ret(lr); } @@ -2497,8 +2588,7 @@ // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract - __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, - ConstantPoolCacheEntry::tos_state_bits); + __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); assert(btos == 0, "change code, btos != 0"); __ cbnz(flags, notByte); @@ -2533,12 +2623,68 @@ __ cmp(flags, (u1)atos); __ br(Assembler::NE, notObj); // atos - do_oop_load(_masm, field, r0, IN_HEAP); - __ push(atos); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + if (!EnableValhalla) { + do_oop_load(_masm, field, r0, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + } else { // Valhalla + + if (is_static) { + __ load_heap_oop(r0, field); + Label isFlattenable, isUninitialized; + // Issue below if the static field has not been initialized yet + __ test_field_is_flattenable(raw_flags, r10, isFlattenable); + // Not flattenable case + __ push(atos); + __ b(Done); + // Flattenable case, must not return null even if uninitialized + __ bind(isFlattenable); + __ cbz(r0, isUninitialized); + __ push(atos); + __ b(Done); + __ bind(isUninitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field), obj, raw_flags); + __ verify_oop(r0); + __ push(atos); + __ b(Done); + } else { + Label isFlattened, isInitialized, isFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(raw_flags, r10, isFlattenable); + // Non-flattenable field case, also covers the object case + __ load_heap_oop(r0, field); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + __ bind(isFlattenable); + __ test_field_is_flattened(raw_flags, r10, isFlattened); + // Non-flattened field case + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), obj, raw_flags); + __ bind(isInitialized); + __ verify_oop(r0); + __ push(atos); + __ b(rewriteFlattenable); + __ bind(isFlattened); + __ ldr(r10, Address(cache, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), obj, raw_flags, r10); + __ verify_oop(r0); + __ push(atos); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qgetfield, bc, r1); + } + __ b(Done); + } } - __ b(Done); __ bind(notObj); __ cmp(flags, (u1)itos); @@ -2708,6 +2854,7 @@ const Register obj = r2; const Register off = r19; const Register flags = r0; + const Register flags2 = r6; const Register bc = r4; resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); @@ -2730,6 +2877,8 @@ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + __ mov(flags2, flags); + // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); @@ -2772,14 +2921,56 @@ // atos { - __ pop(atos); - if (!is_static) pop_and_check_object(obj); - // Store into the field - do_oop_store(_masm, field, r0, IN_HEAP); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); - } - __ b(Done); + if (!EnableValhalla) { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } else { // Valhalla + + __ pop(atos); + if (is_static) { + Label notFlattenable; + __ test_field_is_not_flattenable(flags2, r10, notFlattenable); + __ null_check(r0); + __ bind(notFlattenable); + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(Done); + } else { + Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(flags2, r10, isFlattenable); + // Not flattenable case, covers not flattenable values and objects + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ bind(rewriteNotFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); + } + __ b(Done); + // Implementation of the flattenable semantic + __ bind(isFlattenable); + __ null_check(r0); + __ test_field_is_flattened(flags2, r10, isFlattened); + // Not flattened case + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(rewriteFlattenable); + __ bind(isFlattened); + pop_and_check_object(obj); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, off, obj); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qputfield, bc, r19, true, byte_no); + } + __ b(Done); + } + } // Valhalla } __ bind(notObj); @@ -2919,6 +3110,7 @@ // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2945,6 +3137,7 @@ r19, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2995,6 +3188,19 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qputfield: //fall through + { + Label isFlattened, done; + __ null_check(r0); + __ test_field_is_flattened(r3, r10, isFlattened); + // No Flattened case + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(done); + __ bind(isFlattened); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, r1, r2); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: do_oop_store(_masm, field, r0, IN_HEAP); break; @@ -3088,6 +3294,32 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qgetfield: + { + Label isFlattened, isInitialized, Done; + // DMS CHECK: We don't need to reload multiple times, but stay close to original code + __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ test_field_is_flattened(r10, r10, isFlattened); + // Non-flattened field case + __ mov(r10, r0); + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ mov(r0, r10); + __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r10, r10, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), r0, r10); + __ bind(isInitialized); + __ verify_oop(r0); + __ b(Done); + __ bind(isFlattened); + __ ldrw(r10, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r10, r10, ConstantPoolCacheEntry::field_index_mask); + __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), r0, r10, r3); + __ verify_oop(r0); + __ bind(Done); + } + break; case Bytecodes::_fast_agetfield: do_oop_load(_masm, field, r0, IN_HEAP); __ verify_oop(r0); @@ -3645,6 +3877,30 @@ __ membar(Assembler::StoreStore); } +void TemplateTable::defaultvalue() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue), + c_rarg1, c_rarg2); + __ verify_oop(r0); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::withfield() { + transition(vtos, atos); + resolve_cache_and_index(f2_byte, c_rarg1 /*cache*/, c_rarg2 /*index*/, sizeof(u2)); + + // n.b. unlike x86 cache is now rcpool plus the indexed offset + // so using rcpool to meet shared code expectations + + call_VM(r1, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), rcpool); + __ verify_oop(r1); + __ add(esp, esp, r0); + __ mov(r0, r1); +} + void TemplateTable::newarray() { transition(itos, atos); __ load_unsigned_byte(c_rarg1, at_bcp(1)); @@ -3716,14 +3972,29 @@ __ bind(ok_is_subtype); __ mov(r0, r3); // Restore object in r3 + __ b(done); + __ bind(is_null); + // Collect counts on whether this test sees NULLs a lot or not. if (ProfileInterpreter) { - __ b(done); - __ bind(is_null); __ profile_null_seen(r2); - } else { - __ bind(is_null); // same as 'done' } + + if (EnableValhalla) { + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ lea(r1, Address(rscratch1, r19)); + __ ldarb(r1, r1); + // See if CP entry is a Q-descriptor + __ andr (r1, r1, JVM_CONSTANT_QDESC_BIT); + __ cmp(r1, (u1) JVM_CONSTANT_QDESC_BIT); + __ br(Assembler::NE, done); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + } + __ bind(done); }