--- old/src/hotspot/cpu/aarch64/aarch64.ad 2019-07-12 11:38:06.831852070 +0000 +++ new/src/hotspot/cpu/aarch64/aarch64.ad 2019-07-12 11:38:05.751806721 +0000 @@ -1753,27 +1753,8 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - // n.b. frame size includes space for return pc and rfp - const long framesize = C->frame_size_in_bytes(); - assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment"); - - // insert a nop at the start of the prolog so we can patch in a - // branch if we need to invalidate the method later - __ nop(); - - int bangsize = C->bang_size_in_bytes(); - if (C->need_stack_bang(bangsize) && UseStackBanging) - __ generate_stack_overflow_check(bangsize); - - __ build_frame(framesize); - - if (NotifySimulator) { - __ notify(Assembler::method_entry); - } - - if (VerifyStackAtCalls) { - Unimplemented(); - } + __ verified_entry(C, 0); + __ bind(*_verified_entry); C->set_frame_complete(cbuf.insts_size()); @@ -2094,8 +2075,46 @@ return 4; } -//============================================================================= +///============================================================================= +#ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("# MachVEPNode"); + if (!_verified) { + st->print_cr("\t load_class"); + } else { + st->print_cr("\t unpack_value_arg"); + } +} +#endif + +void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + MacroAssembler _masm(&cbuf); + + if (!_verified) { + Label skip; + __ cmp_klass(j_rarg0, rscratch2, rscratch1); + __ br(Assembler::EQ, skip); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ bind(skip); + } else { + // Unpack value type args passed as oop and then jump to + // the verified entry point (skipping the unverified entry). + __ unpack_value_args(ra_->C, _receiver_only); + __ b(*_verified_entry); + } +} + + +uint MachVEPNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + + +//============================================================================= #ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const { @@ -2117,9 +2136,11 @@ { // This is the unverified entry point. MacroAssembler _masm(&cbuf); + Label skip; + // UseCompressedClassPointers logic are inside cmp_klass __ cmp_klass(j_rarg0, rscratch2, rscratch1); - Label skip; + // TODO // can we avoid this skip and still use a reloc? __ br(Assembler::EQ, skip); @@ -2507,7 +2528,7 @@ { int gps = 0; int fps = 0; - const TypeTuple *domain = tf->domain(); + const TypeTuple *domain = tf->domain_cc(); int max = domain->cnt(); for (int i = TypeFunc::Parms; i < max; i++) { const Type *t = domain->field_at(i); @@ -8182,6 +8203,21 @@ ins_pipe(ialu_reg); %} +instruct castN2X(iRegLNoSp dst, iRegN src) %{ + match(Set dst (CastP2X src)); + + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# ptr -> long" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ mov(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + instruct castP2X(iRegLNoSp dst, iRegP src) %{ match(Set dst (CastP2X src)); @@ -8197,6 +8233,37 @@ ins_pipe(ialu_reg); %} +instruct castN2I(iRegINoSp dst, iRegN src) %{ + match(Set dst (CastN2I src)); + + ins_cost(INSN_COST); + format %{ "movw $dst, $src\t# compressed ptr -> int" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movw(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + +instruct castI2N(iRegNNoSp dst, iRegI src) %{ + match(Set dst (CastI2N src)); + + ins_cost(INSN_COST); + format %{ "movw $dst, $src\t# int -> compressed ptr" %} + + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movw(as_Register($dst$$reg), as_Register($src$$reg)); + } + %} + + ins_pipe(ialu_reg); +%} + + // Convert oop into int for vectors alignment masking instruct convP2I(iRegINoSp dst, iRegP src) %{ match(Set dst (ConvL2I (CastP2X src))); @@ -13686,33 +13753,16 @@ // ============================================================================ // clearing of an array -instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) +instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr) %{ - match(Set dummy (ClearArray cnt base)); + match(Set dummy (ClearArray (Binary cnt base) val)); effect(USE_KILL cnt, USE_KILL base); ins_cost(4 * INSN_COST); - format %{ "ClearArray $cnt, $base" %} - - ins_encode %{ - __ zero_words($base$$Register, $cnt$$Register); - %} - - ins_pipe(pipe_class_memory); -%} - -instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr) -%{ - predicate((u_int64_t)n->in(2)->get_long() - < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL base); - - ins_cost(4 * INSN_COST); - format %{ "ClearArray $cnt, $base" %} + format %{ "ClearArray $cnt, $base, $val" %} ins_encode %{ - __ zero_words($base$$Register, (u_int64_t)$cnt$$constant); + __ fill_words($base$$Register, $cnt$$Register, $val$$Register); %} ins_pipe(pipe_class_memory); --- old/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp 2019-07-12 11:38:09.791976361 +0000 +++ new/src/hotspot/cpu/aarch64/abstractInterpreter_aarch64.cpp 2019-07-12 11:38:08.607926644 +0000 @@ -47,6 +47,7 @@ case T_DOUBLE : i = 8; break; case T_OBJECT : i = 9; break; case T_ARRAY : i = 9; break; + case T_VALUETYPE : i = 10; break; default : ShouldNotReachHere(); } assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, --- old/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-07-12 11:38:11.944066725 +0000 +++ new/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp 2019-07-12 11:38:10.828019863 +0000 @@ -118,6 +118,76 @@ #endif } +// Implementation of LoadFlattenedArrayStub + +LoadFlattenedArrayStub::LoadFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { + _array = array; + _index = index; + _result = result; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + +void LoadFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 1); + ce->store_parameter(_index->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::load_flattened_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + if (_result->as_register() != r0) { + __ mov(_result->as_register(), r0); + } + __ b(_continuation); +} + + +// Implementation of StoreFlattenedArrayStub + +StoreFlattenedArrayStub::StoreFlattenedArrayStub(LIR_Opr array, LIR_Opr index, LIR_Opr value, CodeEmitInfo* info) { + _array = array; + _index = index; + _value = value; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + + +void StoreFlattenedArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_array->as_register(), 2); + ce->store_parameter(_index->as_register(), 1); + ce->store_parameter(_value->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::store_flattened_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +// Implementation of SubstitutabilityCheckStub +SubstitutabilityCheckStub::SubstitutabilityCheckStub(LIR_Opr left, LIR_Opr right, LIR_Opr result, CodeEmitInfo* info) { + _left = left; + _right = right; + _result = result; + _scratch_reg = FrameMap::r0_oop_opr; + _info = new CodeEmitInfo(info); +} + +void SubstitutabilityCheckStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_left->as_register(), 1); + ce->store_parameter(_right->as_register(), 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::substitutability_check_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + if (_result->as_register() != r0) { + __ mov(_result->as_register(), r0); + } + __ b(_continuation); +} // Implementation of NewInstanceStub @@ -134,8 +204,6 @@ _stub_id = stub_id; } - - void NewInstanceStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); @@ -175,11 +243,12 @@ // Implementation of NewObjectArrayStub -NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info, bool is_value_type) { _klass_reg = klass_reg; _result = result; _length = length; _info = new CodeEmitInfo(info); + _is_value_type = is_value_type; } @@ -188,7 +257,13 @@ __ bind(_entry); assert(_length->as_register() == r19, "length must in r19,"); assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); - __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + + if (_is_value_type) { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_value_array_id))); + } else { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + } + ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0"); @@ -196,16 +271,31 @@ } // Implementation of MonitorAccessStubs -MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info, CodeStub* throw_imse_stub, LIR_Opr scratch_reg) : MonitorAccessStub(obj_reg, lock_reg) { _info = new CodeEmitInfo(info); + _scratch_reg = scratch_reg; + _throw_imse_stub = throw_imse_stub; + if (_throw_imse_stub != NULL) { + assert(_scratch_reg != LIR_OprFact::illegalOpr, "must be"); + } } void MonitorEnterStub::emit_code(LIR_Assembler* ce) { assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); + if (_throw_imse_stub != NULL) { + // When we come here, _obj_reg has already been checked to be non-null. + __ ldr(rscratch1, Address(_obj_reg->as_register(), oopDesc::mark_offset_in_bytes())); + __ mov(rscratch2, markOopDesc::always_locked_pattern); + __ andr(rscratch1, rscratch1, rscratch2); + + __ cmp(rscratch1, rscratch2); + __ br(Assembler::NE, *_throw_imse_stub->entry()); + } + ce->store_parameter(_obj_reg->as_register(), 1); ce->store_parameter(_lock_reg->as_register(), 0); Runtime1::StubID enter_id; --- old/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp 2019-07-12 11:38:13.996152890 +0000 +++ new/src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp 2019-07-12 11:38:12.956109220 +0000 @@ -45,7 +45,7 @@ Register reg2 = r_2->as_Register(); assert(reg2 == reg, "must be same register"); opr = as_long_opr(reg); - } else if (type == T_OBJECT || type == T_ARRAY) { + } else if (type == T_OBJECT || type == T_ARRAY || type == T_VALUETYPE) { opr = as_oop_opr(reg); } else if (type == T_METADATA) { opr = as_metadata_opr(reg); --- old/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-07-12 11:38:16.040238721 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp 2019-07-12 11:38:14.988194546 +0000 @@ -40,6 +40,7 @@ #include "gc/shared/collectedHeap.hpp" #include "nativeInst_aarch64.hpp" #include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "vmreg_aarch64.inline.hpp" @@ -242,7 +243,7 @@ // build frame ciMethod* m = compilation()->method(); - __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes(), needs_stack_repair(), NULL); // OSR buffer is // @@ -452,7 +453,7 @@ // remove the activation and dispatch to the unwind handler __ block_comment("remove_frame and dispatch to the unwind handler"); - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); // Emit the slow path assembly @@ -503,8 +504,9 @@ void LIR_Assembler::return_op(LIR_Opr result) { assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); + ciMethod* method = compilation()->method(); // Pop the stack before the safepoint code - __ remove_frame(initial_frame_size_in_bytes()); + __ remove_frame(initial_frame_size_in_bytes(), needs_stack_repair()); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -515,6 +517,10 @@ __ ret(lr); } +void LIR_Assembler::store_value_type_fields_to_buf(ciValueKlass* vk) { + __ store_value_type_fields_to_buf(vk); +} + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { address polling_page(os::get_polling_page()); guarantee(info != NULL, "Shouldn't be NULL"); @@ -562,11 +568,12 @@ break; } + case T_VALUETYPE: case T_OBJECT: { - if (patch_code == lir_patch_none) { - jobject2reg(c->as_jobject(), dest->as_register()); - } else { + if (patch_code != lir_patch_none) { jobject2reg_with_patching(dest->as_register(), info); + } else { + jobject2reg(c->as_jobject(), dest->as_register()); } break; } @@ -608,6 +615,7 @@ void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { LIR_Const* c = src->as_constant_ptr(); switch (c->type()) { + case T_VALUETYPE: case T_OBJECT: { if (! c->as_jobject()) @@ -674,6 +682,7 @@ assert(c->as_jint() == 0, "should be"); insn = &Assembler::strw; break; + case T_VALUETYPE: // DMS CHECK: the code is significantly differ from x86 case T_OBJECT: case T_ARRAY: assert(c->as_jobject() == 0, "should be"); @@ -714,13 +723,13 @@ return; } assert(src->is_single_cpu(), "must match"); - if (src->type() == T_OBJECT) { + if (src->type() == T_OBJECT || src->type() == T_VALUETYPE) { __ verify_oop(src->as_register()); } move_regs(src->as_register(), dest->as_register()); } else if (dest->is_double_cpu()) { - if (src->type() == T_OBJECT || src->type() == T_ARRAY) { + if (src->type() == T_OBJECT || src->type() == T_ARRAY || src->type() == T_VALUETYPE) { // Surprising to me but we can see move of a long to t_object __ verify_oop(src->as_register()); move_regs(src->as_register(), dest->as_register_lo()); @@ -748,7 +757,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { if (src->is_single_cpu()) { - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); __ verify_oop(src->as_register()); } else if (type == T_METADATA || type == T_DOUBLE) { @@ -786,7 +795,7 @@ return; } - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ verify_oop(src->as_register()); if (UseCompressedOops && !wide) { @@ -808,6 +817,7 @@ break; } + case T_VALUETYPE: // fall through case T_ARRAY: // fall through case T_OBJECT: // fall through if (UseCompressedOops && !wide) { @@ -861,7 +871,7 @@ assert(dest->is_register(), "should not call otherwise"); if (dest->is_single_cpu()) { - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); __ verify_oop(dest->as_register()); } else if (type == T_METADATA) { @@ -933,7 +943,7 @@ LIR_Address* addr = src->as_address_ptr(); LIR_Address* from_addr = src->as_address_ptr(); - if (addr->base()->type() == T_OBJECT) { + if (addr->base()->type() == T_OBJECT || addr->base()->type() == T_VALUETYPE) { __ verify_oop(addr->base()->as_pointer_register()); } @@ -957,6 +967,7 @@ break; } + case T_VALUETYPE: // fall through case T_ARRAY: // fall through case T_OBJECT: // fall through if (UseCompressedOops && !wide) { @@ -1011,7 +1022,7 @@ ShouldNotReachHere(); } - if (type == T_ARRAY || type == T_OBJECT) { + if (type == T_ARRAY || type == T_OBJECT || type == T_VALUETYPE) { if (UseCompressedOops && !wide) { __ decode_heap_oop(dest->as_register()); } @@ -1022,11 +1033,28 @@ } } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { if (UseCompressedClassPointers) { + __ andr(dest->as_register(), dest->as_register(), oopDesc::compressed_klass_mask()); __ decode_klass_not_null(dest->as_register()); + } else { + __ ubfm(dest->as_register(), dest->as_register(), 0, 63 - oopDesc::storage_props_nof_bits); } } } +void LIR_Assembler::move(LIR_Opr src, LIR_Opr dst) { + assert(dst->is_cpu_register(), "must be"); + assert(dst->type() == src->type(), "must be"); + + if (src->is_cpu_register()) { + reg2reg(src, dst); + } else if (src->is_stack()) { + stack2reg(src, dst, dst->type()); + } else if (src->is_constant()) { + const2reg(src, dst, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } +} int LIR_Assembler::array_element_size(BasicType type) const { int elem_size = type2aelembytes(type); @@ -1218,7 +1246,7 @@ Register len = op->len()->as_register(); __ uxtw(len, len); - if (UseSlowPath || + if (UseSlowPath || op->type() == T_VALUETYPE || (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { __ b(*op->stub()->entry()); @@ -1530,6 +1558,122 @@ } } +void LIR_Assembler::emit_opFlattenedArrayCheck(LIR_OpFlattenedArrayCheck* op) { + // We are loading/storing an array that *may* be a flattened array (the declared type + // Object[], interface[], or VT?[]). If this array is flattened, take slow path. + + __ load_storage_props(op->tmp()->as_register(), op->array()->as_register()); + __ tst(op->tmp()->as_register(), ArrayStorageProperties::flattened_value); + __ br(Assembler::NE, *op->stub()->entry()); + if (!op->value()->is_illegal()) { + // We are storing into the array. + Label skip; + __ tst(op->tmp()->as_register(), ArrayStorageProperties::null_free_value); + __ br(Assembler::EQ, skip); + // The array is not flattened, but it is null_free. If we are storing + // a null, take the slow path (which will throw NPE). + __ cbz(op->value()->as_register(), *op->stub()->entry()); + __ bind(skip); + } + +} + +void LIR_Assembler::emit_opNullFreeArrayCheck(LIR_OpNullFreeArrayCheck* op) { + // This is called when we use aastore into a an array declared as "[LVT;", + // where we know VT is not flattenable (due to ValueArrayElemMaxFlatOops, etc). + // However, we need to do a NULL check if the actual array is a "[QVT;". + + __ load_storage_props(op->tmp()->as_register(), op->array()->as_register()); + __ mov(rscratch1, (uint64_t) ArrayStorageProperties::null_free_value); + __ cmp(op->tmp()->as_register(), rscratch1); +} + +void LIR_Assembler::emit_opSubstitutabilityCheck(LIR_OpSubstitutabilityCheck* op) { + Label L_oops_equal; + Label L_oops_not_equal; + Label L_end; + + Register left = op->left()->as_register(); + Register right = op->right()->as_register(); + + __ cmp(left, right); + __ br(Assembler::EQ, L_oops_equal); + + // (1) Null check -- if one of the operands is null, the other must not be null (because + // the two references are not equal), so they are not substitutable, + // FIXME: do null check only if the operand is nullable + { + __ cbz(left, L_oops_not_equal); + __ cbz(right, L_oops_not_equal); + } + + + ciKlass* left_klass = op->left_klass(); + ciKlass* right_klass = op->right_klass(); + + // (2) Value object check -- if either of the operands is not a value object, + // they are not substitutable. We do this only if we are not sure that the + // operands are value objects + if ((left_klass == NULL || right_klass == NULL) ||// The klass is still unloaded, or came from a Phi node. + !left_klass->is_valuetype() || !right_klass->is_valuetype()) { + Register tmp1 = rscratch1; /* op->tmp1()->as_register(); */ + Register tmp2 = rscratch2; /* op->tmp2()->as_register(); */ + + __ mov(tmp1, (intptr_t)markOopDesc::always_locked_pattern); + + __ ldr(tmp2, Address(left, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + + __ ldr(tmp2, Address(right, oopDesc::mark_offset_in_bytes())); + __ andr(tmp1, tmp1, tmp2); + + __ mov(tmp2, (intptr_t)markOopDesc::always_locked_pattern); + __ cmp(tmp1, tmp2); + __ br(Assembler::NE, L_oops_not_equal); + } + + // (3) Same klass check: if the operands are of different klasses, they are not substitutable. + if (left_klass != NULL && left_klass->is_valuetype() && left_klass == right_klass) { + // No need to load klass -- the operands are statically known to be the same value klass. + __ b(*op->stub()->entry()); + } else { + Register left_klass_op = op->left_klass_op()->as_register(); + Register right_klass_op = op->right_klass_op()->as_register(); + + // DMS CHECK, likely x86 bug, make aarch64 implementation correct + __ load_klass(left_klass_op, left); + __ load_klass(right_klass_op, right); + __ cmp(left_klass_op, right_klass_op); + __ br(Assembler::EQ, *op->stub()->entry()); // same klass -> do slow check + // fall through to L_oops_not_equal + } + + __ bind(L_oops_not_equal); + move(op->not_equal_result(), op->result_opr()); + __ b(L_end); + + __ bind(L_oops_equal); + move(op->equal_result(), op->result_opr()); + __ b(L_end); + + // We've returned from the stub. op->result_opr() contains 0x0 IFF the two + // operands are not substitutable. (Don't compare against 0x1 in case the + // C compiler is naughty) + __ bind(*op->stub()->continuation()); + + if (op->result_opr()->type() == T_LONG) { + __ cbzw(op->result_opr()->as_register(), L_oops_not_equal); // (call_stub() == 0x0) -> not_equal + } else { + __ cbz(op->result_opr()->as_register(), L_oops_not_equal); // (call_stub() == 0x0) -> not_equal + } + + move(op->equal_result(), op->result_opr()); // (call_stub() != 0x0) -> equal + // fall-through + __ bind(L_end); + +} + + void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1); __ cset(rscratch1, Assembler::NE); @@ -1940,10 +2084,10 @@ if (opr2->is_single_cpu()) { // cpu register - cpu register Register reg2 = opr2->as_register(); - if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_VALUETYPE) { __ cmpoop(reg1, reg2); } else { - assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?"); + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_VALUETYPE, "cmp int, oop?"); __ cmpw(reg1, reg2); } return; @@ -1970,6 +2114,7 @@ case T_ADDRESS: imm = opr2->as_constant_ptr()->as_jint(); break; + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: jobject2reg(opr2->as_constant_ptr()->as_jobject(), rscratch1); @@ -2136,6 +2281,7 @@ } break; case T_LONG: + case T_VALUETYPE: case T_ADDRESS: case T_OBJECT: switch (code) { @@ -2172,6 +2318,7 @@ break; case T_LONG: case T_ADDRESS: + case T_VALUETYPE: case T_OBJECT: switch (code) { case lir_shl: __ lsl (dreg, lreg, count); break; @@ -2216,6 +2363,19 @@ __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes)); } +void LIR_Assembler::arraycopy_valuetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest) { + __ load_storage_props(tmp, obj); + if (is_dest) { + // We also take slow path if it's a null_free destination array, just in case the source array + // contains NULLs. + __ tst(tmp, ArrayStorageProperties::flattened_value | ArrayStorageProperties::null_free_value); + } else { + __ tst(tmp, ArrayStorageProperties::flattened_value); + } + __ br(Assembler::NE, *slow_path->entry()); +} + + // This code replaces a call to arraycopy; no exception may // be thrown in this code, they must be thrown in the System.arraycopy @@ -2235,7 +2395,23 @@ CodeStub* stub = op->stub(); int flags = op->flags(); BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; - if (basic_type == T_ARRAY) basic_type = T_OBJECT; + if (basic_type == T_ARRAY || basic_type == T_VALUETYPE) basic_type = T_OBJECT; + + if (flags & LIR_OpArrayCopy::always_slow_path) { + __ b(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + + if (flags & LIR_OpArrayCopy::src_valuetype_check) { + arraycopy_valuetype_check(src, tmp, stub, false); + } + + if (flags & LIR_OpArrayCopy::dst_valuetype_check) { + arraycopy_valuetype_check(dst, tmp, stub, true); + } + + // if we don't know anything, just go through the generic arraycopy if (default_type == NULL // || basic_type == T_OBJECT @@ -2904,6 +3080,7 @@ case T_INT: case T_LONG: case T_OBJECT: + case T_VALUETYPE: type = 1; break; case T_FLOAT: @@ -3170,6 +3347,7 @@ xchg = &MacroAssembler::atomic_xchgal; add = &MacroAssembler::atomic_addal; break; + case T_VALUETYPE: case T_OBJECT: case T_ARRAY: if (UseCompressedOops) { --- old/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp 2019-07-12 11:38:18.312334126 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp 2019-07-12 11:38:17.220288271 +0000 @@ -82,5 +82,7 @@ void store_parameter(Register r, int offset_from_esp_in_words); void store_parameter(jint c, int offset_from_esp_in_words); void store_parameter(jobject c, int offset_from_esp_in_words); + void arraycopy_valuetype_check(Register obj, Register tmp, CodeStub* slow_path, bool is_dest); + void move(LIR_Opr src, LIR_Opr dst); #endif // CPU_AARCH64_C1_LIRASSEMBLER_AARCH64_HPP --- old/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2019-07-12 11:38:20.316418279 +0000 +++ new/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp 2019-07-12 11:38:19.252373599 +0000 @@ -35,6 +35,7 @@ #include "ci/ciArray.hpp" #include "ci/ciObjArrayKlass.hpp" #include "ci/ciTypeArrayKlass.hpp" +#include "ci/ciValueKlass.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" #include "vmreg_aarch64.inline.hpp" @@ -101,6 +102,12 @@ } +void LIRGenerator::init_temps_for_substitutability_check(LIR_Opr& tmp1, LIR_Opr& tmp2) { + tmp1 = new_register(T_INT); + tmp2 = LIR_OprFact::illegalOpr; +} + + //--------- loading items into registers -------------------------------- @@ -332,7 +339,7 @@ LIR_Opr lock = new_register(T_INT); // Need a scratch register for biased locking LIR_Opr scratch = LIR_OprFact::illegalOpr; - if (UseBiasedLocking) { + if (UseBiasedLocking || x->maybe_valuetype()) { scratch = new_register(T_INT); } @@ -340,11 +347,17 @@ if (x->needs_null_check()) { info_for_exception = state_for(x); } + + CodeStub* throw_imse_stub = + x->maybe_valuetype() ? + new SimpleExceptionStub(Runtime1::throw_illegal_monitor_state_exception_id, LIR_OprFact::illegalOpr, state_for(x)) : + NULL; + // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); monitor_enter(obj.result(), lock, syncTempOpr(), scratch, - x->monitor_no(), info_for_exception, info); + x->monitor_no(), info_for_exception, info, throw_imse_stub); } @@ -1153,6 +1166,22 @@ __ move(reg, result); } +void LIRGenerator::do_NewValueTypeInstance (NewValueTypeInstance* x) { + // Mapping to do_NewInstance (same code) + CodeEmitInfo* info = state_for(x, x->state()); + x->set_to_object_type(); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r2_oop_opr, + FrameMap::r5_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); + +} + void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { CodeEmitInfo* info = state_for(x, x->state()); @@ -1198,13 +1227,18 @@ length.load_item_force(FrameMap::r19_opr); LIR_Opr len = length.result(); - CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); - ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + ciKlass* obj = (ciKlass*) x->exact_type(); + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info, x->is_never_null()); if (obj == ciEnv::unloaded_ciobjarrayklass()) { BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); } + klass2reg_with_patching(klass_reg, obj, patching_info); - __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + if (x->is_never_null()) { + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_VALUETYPE, klass_reg, slow_path); + } else { + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + } LIR_Opr result = rlock_result(x); __ move(reg, result); @@ -1280,6 +1314,9 @@ CodeEmitInfo* info_for_exception = (x->needs_exception_state() ? state_for(x) : state_for(x, x->state_before(), true /*ignore_xhandler*/)); + if (x->is_never_null()) { + __ null_check(obj.result(), new CodeEmitInfo(info_for_exception)); + } CodeStub* stub; if (x->is_incompatible_class_change_check()) { @@ -1298,10 +1335,13 @@ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { tmp3 = new_register(objectType); } + + __ checkcast(reg, obj.result(), x->klass(), new_register(objectType), new_register(objectType), tmp3, x->direct_compare(), info_for_exception, patching_info, stub, - x->profiled_method(), x->profiled_bci()); + x->profiled_method(), x->profiled_bci(), x->is_never_null()); + } void LIRGenerator::do_InstanceOf(InstanceOf* x) { --- old/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-07-12 11:38:22.412506295 +0000 +++ new/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp 2019-07-12 11:38:21.360462119 +0000 @@ -28,6 +28,8 @@ #include "c1/c1_Runtime1.hpp" #include "classfile/systemDictionary.hpp" #include "gc/shared/collectedHeap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/interpreter.hpp" #include "oops/arrayOop.hpp" #include "oops/markOop.hpp" @@ -83,6 +85,12 @@ ldr(hdr, Address(obj, hdr_offset)); // and mark it as unlocked orr(hdr, hdr, markOopDesc::unlocked_value); + + if (EnableValhalla && !UseBiasedLocking) { + // Mask always_locked bit such that we go to the slow path if object is a value type + andr(hdr, hdr, ~markOopDesc::biased_lock_bit_in_place); + } + // save unlocked object header into the displaced header location on the stack str(hdr, Address(disp_hdr, 0)); // test if object header is still the same (i.e. unlocked), and if so, store the @@ -330,7 +338,9 @@ } -void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes, bool needs_stack_repair, Label* verified_value_entry_label) { + + // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a B, BL, NOP, BKPT, @@ -340,23 +350,52 @@ // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). generate_stack_overflow_check(bang_size_in_bytes); + + guarantee(needs_stack_repair == false, "Stack repair should not be true"); + if (verified_value_entry_label != NULL) { + bind(*verified_value_entry_label); + } + MacroAssembler::build_frame(framesize + 2 * wordSize); if (NotifySimulator) { notify(Assembler::method_entry); } } -void C1_MacroAssembler::remove_frame(int framesize) { +void C1_MacroAssembler::remove_frame(int framesize, bool needs_stack_repair) { + + guarantee(needs_stack_repair == false, "Stack repair should not be true"); + MacroAssembler::remove_frame(framesize + 2 * wordSize); if (NotifySimulator) { notify(Assembler::method_reentry); } } - -void C1_MacroAssembler::verified_entry() { +void C1_MacroAssembler::verified_value_entry() { + if (C1Breakpoint || VerifyFPU || !UseStackBanging) { + // Verified Entry first instruction should be 5 bytes long for correct + // patching by patch_verified_entry(). + // + // C1Breakpoint and VerifyFPU have one byte first instruction. + // Also first instruction will be one byte "push(rbp)" if stack banging + // code is not generated (see build_frame() above). + // For all these cases generate long instruction first. + nop(); + } + + // build frame + // DMS CHECK: is it nop? + // verify_FPU(0, "method_entry"); + +} + +int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature *ces, int frame_size_in_bytes, int bang_size_in_bytes, Label& verified_value_entry_label, bool is_value_ro_entry) { + guarantee(false, "Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented"); + return 0; } + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { // rbp, + 0: link // + 1: return address --- old/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-07-12 11:38:24.480593137 +0000 +++ new/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp 2019-07-12 11:38:23.416548456 +0000 @@ -773,6 +773,7 @@ case new_type_array_id: case new_object_array_id: + case new_value_array_id: { Register length = r19; // Incoming Register klass = r3; // Incoming @@ -780,9 +781,13 @@ if (id == new_type_array_id) { __ set_info("new_type_array", dont_gc_arguments); - } else { + } + else if (id == new_object_array_id) { __ set_info("new_object_array", dont_gc_arguments); } + else { + __ set_info("new_value_array", dont_gc_arguments); + } #ifdef ASSERT // assert object type is really an array of the proper kind @@ -791,9 +796,14 @@ Register t0 = obj; __ ldrw(t0, Address(klass, Klass::layout_helper_offset())); __ asrw(t0, t0, Klass::_lh_array_tag_shift); - int tag = ((id == new_type_array_id) - ? Klass::_lh_array_tag_type_value - : Klass::_lh_array_tag_obj_value); + + int tag = 0; + switch (id) { + case new_type_array_id: tag = Klass::_lh_array_tag_type_value; break; + case new_object_array_id: tag = Klass::_lh_array_tag_obj_value; break; + case new_value_array_id: tag = Klass::_lh_array_tag_vt_value; break; + default: ShouldNotReachHere(); + } __ mov(rscratch1, tag); __ cmpw(t0, rscratch1); __ br(Assembler::EQ, ok); @@ -853,6 +863,7 @@ if (id == new_type_array_id) { call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); } else { + // Runtime1::new_object_array handles both object and value arrays call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); } @@ -888,6 +899,84 @@ } break; + case buffer_value_args_id: + case buffer_value_args_no_receiver_id: + { + const char* name = (id == buffer_value_args_id) ? + "buffer_value_args" : "buffer_value_args_no_receiver"; + StubFrame f(sasm, name, dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 2); + Register method = r1; + address entry = (id == buffer_value_args_id) ? + CAST_FROM_FN_PTR(address, buffer_value_args) : + CAST_FROM_FN_PTR(address, buffer_value_args_no_receiver); + int call_offset = __ call_RT(r0, noreg, entry, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + __ verify_oop(r0); // r0: an array of buffered value objects + } + break; + + case load_flattened_array_id: + { + StubFrame f(sasm, "load_flattened_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 3); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: array + f.load_argument(0, r1); // r1,: index + int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, load_flattened_array), r0, r1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0: loaded element at array[index] + __ verify_oop(r0); + } + break; + + case store_flattened_array_id: + { + StubFrame f(sasm, "store_flattened_array", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 4); + + // Called with store_parameter and not C abi + + f.load_argument(2, r0); // r0: array + f.load_argument(1, r1); // r1: index + f.load_argument(0, r2); // r2: value + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, store_flattened_array), r0, r1, r2); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + } + break; + + case substitutability_check_id: + { + StubFrame f(sasm, "substitutability_check", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, 3); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: left + f.load_argument(0, r1); // r1,: right + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, substitutability_check), r0, r1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0,: are the two operands substitutable + } + break; + + + case register_finalizer_id: { __ set_info("register_finalizer", dont_gc_arguments); @@ -927,11 +1016,17 @@ break; case throw_incompatible_class_change_error_id: - { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + { StubFrame f(sasm, "throw_incompatible_class_change_exception", dont_gc_arguments); oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); } break; + case throw_illegal_monitor_state_exception_id: + { StubFrame f(sasm, "throw_illegal_monitor_state_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_illegal_monitor_state_exception), false); + } + break; + case slow_subtype_check_id: { // Typical calling sequence: @@ -1123,8 +1218,10 @@ } break; - default: + // DMS CHECK: This code should be fixed in JDK workspace, because it fails + // with assert during vm intialization rather than insert a call + // to unimplemented_entry { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ mov(r0, (int)id); __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); @@ -1133,6 +1230,8 @@ break; } } + + return oop_maps; } --- old/src/hotspot/cpu/aarch64/frame_aarch64.cpp 2019-07-12 11:38:26.532679307 +0000 +++ new/src/hotspot/cpu/aarch64/frame_aarch64.cpp 2019-07-12 11:38:25.500635970 +0000 @@ -594,6 +594,7 @@ } switch (type) { + case T_VALUETYPE : case T_OBJECT : case T_ARRAY : { oop obj; --- old/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:28.488761447 +0000 +++ new/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:27.468718613 +0000 @@ -273,7 +273,12 @@ } void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2) { + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { + + if (tmp3 == noreg) { + tmp3 = r8; + } + // flatten object address if needed if (dst.index() == noreg && dst.offset() == 0) { if (dst.base() != r3) { @@ -292,7 +297,7 @@ false /* expand_call */); if (val == noreg) { - BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), noreg, noreg, noreg); + BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), noreg, noreg, noreg, noreg); } else { // G1 barrier needs uncompressed oop for region cross check. Register new_val = val; @@ -300,7 +305,7 @@ new_val = rscratch2; __ mov(new_val, val); } - BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), val, noreg, noreg); + BarrierSetAssembler::store_at(masm, decorators, type, Address(r3, 0), val, noreg, noreg, noreg); g1_write_barrier_post(masm, r3 /* store_adr */, new_val /* new_val */, --- old/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:30.480845099 +0000 +++ new/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:29.452801929 +0000 @@ -57,7 +57,7 @@ Register tmp2); virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2); + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); public: #ifdef COMPILER1 --- old/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp 2019-07-12 11:38:32.528931104 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.cpp 2019-07-12 11:38:31.460886253 +0000 @@ -73,7 +73,7 @@ } void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2) { + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool in_native = (decorators & IN_NATIVE) != 0; switch (type) { @@ -229,3 +229,21 @@ } __ str(t1, Address(rthread, in_bytes(JavaThread::allocated_bytes_offset()))); } + +void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { +// DMS CHECK: 8210498: nmethod entry barriers is not implemented +#if 0 + BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); + if (bs_nm == NULL) { + return; + } + Label continuation; + Address disarmed_addr(rthread, in_bytes(bs_nm->thread_disarmed_offset())); + __ align(8); + __ ldr(rscratch1, disarmed_addr); + __ cbz(rscratch1, continuation); + __ blr(RuntimeAddress(StubRoutines::aarch64::method_entry_barrier())); + __ bind(continuation); +#endif +} + --- old/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp 2019-07-12 11:38:34.593017781 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/barrierSetAssembler_aarch64.hpp 2019-07-12 11:38:33.480971083 +0000 @@ -43,7 +43,7 @@ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp_thread); virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2); + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3 = noreg); virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2); @@ -72,6 +72,7 @@ Label& slow_case // continuation point if fast allocation fails ); virtual void barrier_stubs_init() {} + virtual void nmethod_entry_barrier(MacroAssembler* masm); }; #endif // CPU_AARCH64_GC_SHARED_BARRIERSETASSEMBLER_AARCH64_HPP --- old/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:36.701106307 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:35.597059944 +0000 @@ -91,18 +91,26 @@ } void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2) { + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; bool is_array = (decorators & IS_ARRAY) != 0; bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; bool precise = is_array || on_anonymous; bool needs_post_barrier = val != noreg && in_heap; - BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg); if (needs_post_barrier) { // flatten object address if needed if (!precise || (dst.index() == noreg && dst.offset() == 0)) { - store_check(masm, dst.base(), dst); + if (tmp3 != noreg) { + // Called by MacroAssembler::pack_value_helper. We cannot corrupt the dst.base() register + __ mov(tmp3, dst.base()); + store_check(masm, tmp3, dst); + } else { + // It's OK to corrupt the dst.base() register. + store_check(masm, dst.base(), dst); + } + } else { __ lea(r3, dst); store_check(masm, r3, dst); --- old/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:38.901198697 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/cardTableBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:37.777151494 +0000 @@ -35,7 +35,7 @@ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register start, Register count, Register tmp, RegSet saved_regs); virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2); + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); }; --- old/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:41.053289072 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:39.937242205 +0000 @@ -45,10 +45,10 @@ } void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2) { + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { if (type == T_OBJECT || type == T_ARRAY) { - oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); } else { - BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); } } --- old/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:43.177378273 +0000 +++ new/src/hotspot/cpu/aarch64/gc/shared/modRefBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:42.077332077 +0000 @@ -40,7 +40,7 @@ Register start, Register count, Register tmp, RegSet saved_regs) {} virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2) = 0; + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0; public: virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, @@ -48,7 +48,7 @@ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, Register start, Register count, Register tmp, RegSet saved_regs); virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, - Address dst, Register val, Register tmp1, Register tmp2); + Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); }; #endif // CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP --- old/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:45.361469994 +0000 +++ new/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp 2019-07-12 11:38:44.225422285 +0000 @@ -128,7 +128,8 @@ Address dst, Register val, Register tmp1, - Register tmp2) { + Register tmp2, + Register tmp3) { // Verify value if (type == T_OBJECT || type == T_ARRAY) { // Note that src could be noreg, which means we --- old/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:47.561562387 +0000 +++ new/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp 2019-07-12 11:38:46.429514846 +0000 @@ -55,7 +55,8 @@ Address dst, Register val, Register tmp1, - Register tmp2); + Register tmp2, + Register tmp3); #endif // ASSERT virtual void arraycopy_prologue(MacroAssembler* masm, --- old/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-07-12 11:38:49.721653102 +0000 +++ new/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-07-12 11:38:48.593605729 +0000 @@ -67,6 +67,7 @@ define_pd_global(bool, PreserveFramePointer, false); define_pd_global(bool, ValueTypePassFieldsAsArgs, false); +define_pd_global(bool, ValueTypeReturnedAsFields, false); // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread --- old/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-07-12 11:38:51.893744322 +0000 +++ new/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp 2019-07-12 11:38:50.769697116 +0000 @@ -35,6 +35,7 @@ #include "oops/markOop.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" +#include "oops/valueKlass.hpp" #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" @@ -656,6 +657,7 @@ // get sender esp ldr(esp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); + if (StackReservedPages > 0) { // testing if reserved zone needs to be re-enabled Label no_reserved_zone_enabling; @@ -672,6 +674,7 @@ bind(no_reserved_zone_enabling); } + // remove frame anchor leave(); // If we're returning to interpreted code we will shortly be @@ -725,6 +728,11 @@ // Save (object->mark() | 1) into BasicLock's displaced header str(swap_reg, Address(lock_reg, mark_offset)); + if (EnableValhalla && !UseBiasedLocking) { + // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking + andr(swap_reg, swap_reg, ~markOopDesc::biased_lock_bit_in_place); + } + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); --- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp 2019-07-12 11:38:54.157839406 +0000 +++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.cpp 2019-07-12 11:38:53.037792368 +0000 @@ -255,6 +255,10 @@ } } +void InterpreterRuntime::SignatureHandlerGenerator::pass_valuetype() { + pass_object(); +} + void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { // generate code to handle arguments iterate(fingerprint); @@ -348,6 +352,11 @@ } } + virtual void pass_valuetype() { + // values are handled with oops, like objects + pass_object(); + } + virtual void pass_float() { jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0)); --- old/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp 2019-07-12 11:38:56.357931803 +0000 +++ new/src/hotspot/cpu/aarch64/interpreterRT_aarch64.hpp 2019-07-12 11:38:55.229884428 +0000 @@ -44,6 +44,7 @@ void pass_float(); void pass_double(); void pass_object(); + void pass_valuetype(); public: // Creation --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-07-12 11:38:58.522022689 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp 2019-07-12 11:38:57.401975650 +0000 @@ -1309,7 +1309,11 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } // Pass register number to verify_oop_subroutine const char* b = NULL; @@ -1339,7 +1343,11 @@ } void MacroAssembler::verify_oop_addr(Address addr, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } const char* b = NULL; { @@ -1442,6 +1450,10 @@ call_VM_leaf_base(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -1491,6 +1503,39 @@ } } +void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) { + ldrw(temp_reg, Address(klass, Klass::access_flags_offset())); + andr(temp_reg, temp_reg, JVM_ACC_VALUE); + cbnz(temp_reg, is_value); +} + +void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) { + (void) temp_reg; // keep signature uniform with x86 + tbnz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, is_flattenable); +} + +void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& not_flattenable) { + (void) temp_reg; // keep signature uniform with x86 + tbz(flags, ConstantPoolCacheEntry::is_flattenable_field_shift, not_flattenable); +} + +void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) { + (void) temp_reg; // keep signature uniform with x86 + tbnz(flags, ConstantPoolCacheEntry::is_flattened_field_shift, is_flattened); +} + +void MacroAssembler::test_flattened_array_oop(Register oop, Register temp_reg, Label& is_flattened_array) { + load_storage_props(temp_reg, oop); + andr(temp_reg, temp_reg, ArrayStorageProperties::flattened_value); + cbnz(temp_reg, is_flattened_array); +} + +void MacroAssembler::test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array) { + load_storage_props(temp_reg, oop); + andr(temp_reg, temp_reg, ArrayStorageProperties::null_free_value); + cbnz(temp_reg, is_null_free_array); +} + // MacroAssembler protected routines needed to implement // public methods @@ -3683,15 +3728,24 @@ bs->obj_equals(this, obj1, obj2); } -void MacroAssembler::load_klass(Register dst, Register src) { +void MacroAssembler::load_metadata(Register dst, Register src) { if (UseCompressedClassPointers) { ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes())); - decode_klass_not_null(dst); } else { ldr(dst, Address(src, oopDesc::klass_offset_in_bytes())); } } +void MacroAssembler::load_klass(Register dst, Register src) { + load_metadata(dst, src); + if (UseCompressedClassPointers) { + andr(dst, dst, oopDesc::compressed_klass_mask()); + decode_klass_not_null(dst); + } else { + ubfm(dst, dst, 0, 63 - oopDesc::storage_props_nof_bits); + } +} + // ((OopHandle)result).resolve(); void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { // OopHandle::resolve is an indirection. @@ -3707,6 +3761,15 @@ resolve_oop_handle(dst, tmp); } +void MacroAssembler::load_storage_props(Register dst, Register src) { + load_metadata(dst, src); + if (UseCompressedClassPointers) { + asrw(dst, dst, oopDesc::narrow_storage_props_shift); + } else { + asr(dst, dst, oopDesc::wide_storage_props_shift); + } +} + void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) { if (UseCompressedClassPointers) { ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); @@ -4024,14 +4087,14 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, - Register tmp1, Register thread_tmp) { + Register tmp1, Register thread_tmp, Register tmp3) { BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); decorators = AccessInternal::decorator_fixup(decorators); bool as_raw = (decorators & AS_RAW) != 0; if (as_raw) { - bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3); } else { - bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp, tmp3); } } @@ -4055,13 +4118,13 @@ } void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, - Register thread_tmp, DecoratorSet decorators) { - access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); + Register thread_tmp, Register tmp3, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp, tmp3); } // Used for storing NULLs. void MacroAssembler::store_heap_oop_null(Address dst) { - access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); + access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); } Address MacroAssembler::allocate_metadata_address(Metadata* obj) { @@ -5865,3 +5928,39 @@ pop(saved_regs, sp); } + +// C2 compiled method's prolog code +// Moved here from aarch64.ad to support Valhalla code belows +void MacroAssembler::verified_entry(Compile* C, int sp_inc) { + +// n.b. frame size includes space for return pc and rfp + const long framesize = C->frame_size_in_bytes(); + assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + nop(); + + int bangsize = C->bang_size_in_bytes(); + if (C->need_stack_bang(bangsize) && UseStackBanging) + generate_stack_overflow_check(bangsize); + + build_frame(framesize); + + if (NotifySimulator) { + notify(Assembler::method_entry); + } + + if (VerifyStackAtCalls) { + Unimplemented(); + } +} + +void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) { + // Called from MachVEP node + unimplemented("Support for ValueTypePassFieldsAsArgs and ValueTypeReturnedAsFields is not implemented"); +} + +void MacroAssembler::store_value_type_fields_to_buf(ciValueKlass* vk) { + super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf()); +} --- old/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-07-12 11:39:00.970125504 +0000 +++ new/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp 2019-07-12 11:38:59.850078465 +0000 @@ -29,6 +29,8 @@ #include "asm/assembler.hpp" #include "oops/compressedOops.hpp" +class ciValueKlass; + // MacroAssembler extends Assembler by frequently used macros. // // Instructions for which a 'better' code sequence exists depending @@ -585,6 +587,16 @@ static bool needs_explicit_null_check(intptr_t offset); static bool uses_implicit_null_check(void* address); + void test_klass_is_value(Register klass, Register temp_reg, Label& is_value); + + void test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable); + void test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable); + void test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened); + + // Check klass/oops is flat value type array (oop->_klass->_layout_helper & vt_bit) + void test_flattened_array_oop(Register klass, Register temp_reg, Label& is_flattened_array); + void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); + static address target_addr_for_insn(address insn_addr, unsigned insn); static address target_addr_for_insn(address insn_addr) { unsigned insn = *(unsigned*)insn_addr; @@ -789,6 +801,9 @@ void c2bool(Register x); // oop manipulations + void load_metadata(Register dst, Register src); + void load_storage_props(Register dst, Register src); + void load_klass(Register dst, Register src); void store_klass(Register dst, Register src); void cmp_klass(Register oop, Register trial_klass, Register tmp); @@ -800,7 +815,7 @@ Register tmp1, Register tmp_thread); void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, - Register tmp1, Register tmp_thread); + Register tmp1, Register tmp_thread, Register tmp3 = noreg); // Resolves obj for access. Result is placed in the same register. // All other registers are preserved. @@ -812,7 +827,7 @@ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, Register thread_tmp = noreg, DecoratorSet decorators = 0); void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, - Register tmp_thread = noreg, DecoratorSet decorators = 0); + Register tmp_thread = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); // currently unimplemented // Used for storing NULL. All other oop constants should be @@ -1143,6 +1158,19 @@ void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); + + enum RegState { + reg_readonly, + reg_writable, + reg_written + }; + + void verified_entry(Compile* C, int sp_inc); + +// Unpack all value type arguments passed as oops + void unpack_value_args(Compile* C, bool receiver_only); + void store_value_type_fields_to_buf(ciValueKlass* vk); + void tableswitch(Register index, jint lowbound, jint highbound, Label &jumptable, Label &jumptable_end, int stride = 1) { adr(rscratch1, jumptable); @@ -1235,6 +1263,8 @@ int elem_size); void fill_words(Register base, Register cnt, Register value); + void fill_words(Register base, u_int64_t cnt, Register value); + void zero_words(Register base, u_int64_t cnt); void zero_words(Register ptr, Register cnt); void zero_dcache_blocks(Register base, Register cnt); --- old/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-07-12 11:39:03.254221432 +0000 +++ new/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp 2019-07-12 11:39:02.146174896 +0000 @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" @@ -288,6 +289,7 @@ case T_OBJECT: case T_ARRAY: case T_ADDRESS: + case T_VALUETYPE: if (int_args < Argument::n_int_register_parameters_j) { regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); } else { @@ -321,6 +323,90 @@ return align_up(stk_args, 2); } + +// const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; +const uint SharedRuntime::java_return_convention_max_int = 6; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + +int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) { + + // Create the mapping between argument positions and + // registers. + // r1, r2 used to address klasses and states, exclude it from return convention to avoid colision + + static const Register INT_ArgReg[java_return_convention_max_int] = { + r0 /* j_rarg7 */, j_rarg6, j_rarg5, j_rarg4, j_rarg3, j_rarg2 + }; + + static const FloatRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, j_farg4, j_farg5, j_farg6, j_farg7 + }; + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + // Should we have gurantee here? + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + // Should T_METADATA be added to java_calling_convention as well ? + case T_METADATA: + case T_VALUETYPE: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args ++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args ++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -351,46 +437,18 @@ __ bind(L); } -static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - Label& skip_fixup) { - // Before we get into the guts of the C2I adapter, see if we should be here - // at all. We've come from compiled code and are attempting to jump to the - // interpreter, which means the caller made a static call to get here - // (vcalls always get a compiled target if there is one). Check for a - // compiled target. If there is one, we need to patch the caller's call. - patch_callers_callsite(masm); - - __ bind(skip_fixup); - - int words_pushed = 0; - - // Since all args are passed on the stack, total_args_passed * - // Interpreter::stackElementSize is the space we need. - - int extraspace = total_args_passed * Interpreter::stackElementSize; - - __ mov(r13, sp); +// For each value type argument, sig includes the list of fields of +// the value type. This utility function computes the number of +// arguments for the call if value types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + total_args_passed = sig_extended->length(); + return total_args_passed; +} - // stack is aligned, keep it that way - extraspace = align_up(extraspace, 2*wordSize); - if (extraspace) - __ sub(sp, sp, extraspace); - - // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // offset to start parameters - int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; +static void gen_c2i_adapter_helper(MacroAssembler* masm, BasicType bt, const VMRegPair& reg_pair, int extraspace, const Address& to) { // Say 4 args: // i st_off @@ -405,76 +463,122 @@ // leaves one slot empty and only stores to a single slot. In this case the // slot that is occupied is the T_VOID slot. See I said it was confusing. - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); + // int next_off = st_off - Interpreter::stackElementSize; + + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + if (!r_1->is_valid()) { assert(!r_2->is_valid(), ""); - continue; + return; } + if (r_1->is_stack()) { // memory to memory use rscratch1 - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size - + extraspace - + words_pushed * wordSize); + // DMS CHECK: words_pushed is always 0 and can be removed? + // int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace + words_pushed * wordSize); + int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace); if (!r_2->is_valid()) { // sign extend?? __ ldrw(rscratch1, Address(sp, ld_off)); - __ str(rscratch1, Address(sp, st_off)); + __ str(rscratch1, to); } else { - __ ldr(rscratch1, Address(sp, ld_off)); - - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ str(rscratch1, Address(sp, next_off)); -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, 0xdeadffffdeadaaaaul); - __ str(rscratch1, Address(sp, st_off)); -#endif /* ASSERT */ - } else { - __ str(rscratch1, Address(sp, st_off)); - } + __ str(rscratch1, to); } } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ str(r, Address(sp, st_off)); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, 0xdeadffffdeadaaabul); - __ str(rscratch1, Address(sp, st_off)); -#endif /* ASSERT */ - __ str(r, Address(sp, next_off)); - } else { - __ str(r, Address(sp, st_off)); - } - } + Register r = r_1->as_Register(); + __ str(r, to); } else { assert(r_1->is_FloatRegister(), ""); if (!r_2->is_valid()) { // only a float use just part of the slot - __ strs(r_1->as_FloatRegister(), Address(sp, st_off)); + __ strs(r_1->as_FloatRegister(), to); } else { -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov(rscratch1, 0xdeadffffdeadaaacul); - __ str(rscratch1, Address(sp, st_off)); -#endif /* ASSERT */ - __ strd(r_1->as_FloatRegister(), Address(sp, next_off)); + __ strd(r_1->as_FloatRegister(), to); } + } +} + +static void gen_c2i_adapter(MacroAssembler *masm, + const GrowableArray* sig_extended, + const VMRegPair *regs, + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_value_receiver) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm); + + __ bind(skip_fixup); + + bool has_value_argument = false; + int words_pushed = 0; + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + + int total_args_passed = compute_total_args_passed_int(sig_extended); + int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; + + // stack is aligned, keep it that way + extraspace = align_up(extraspace, 2 * wordSize); + + __ mov(r13, sp); + + if (extraspace) + __ sub(sp, sp, extraspace); + + // Now write the args into the outgoing interpreter space + + int ignored = 0, next_vt_arg = 0, next_arg_int = 0; + bool has_oop_field = false; + + for (int next_arg_comp = 0; next_arg_comp < total_args_passed; next_arg_comp++) { + BasicType bt = sig_extended->at(next_arg_comp)._bt; + // offset to start parameters + int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; + + if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { + continue; // Ignore reserved entry } + + if (bt == T_VOID) { + assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half"); + next_arg_int ++; + continue; + } + + int next_off = st_off - Interpreter::stackElementSize; + int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + + gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset)); + next_arg_int ++; + } + +// If a value type was allocated and initialized, apply post barrier to all oop fields + if (has_value_argument && has_oop_field) { + __ push(r13); // save senderSP + __ push(r1); // save callee + // Allocate argument register save area + if (frame::arg_reg_save_area_bytes != 0) { + __ sub(sp, sp, frame::arg_reg_save_area_bytes); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), rthread, r10); + // De-allocate argument register save area + if (frame::arg_reg_save_area_bytes != 0) { + __ add(sp, sp, frame::arg_reg_save_area_bytes); + } + __ pop(r1); // restore callee + __ pop(r13); // restore sender SP } __ mov(esp, sp); // Interp expects args on caller's expression stack @@ -483,12 +587,8 @@ __ br(rscratch1); } +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, const GrowableArray* sig, const VMRegPair *regs) { -void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since // we may do a i2c -> c2i transition if we lose a race where compiled @@ -548,10 +648,11 @@ } // Cut-out for having no stack args. - int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + int comp_words_on_stack = 0; if (comp_args_on_stack) { - __ sub(rscratch1, sp, comp_words_on_stack * wordSize); - __ andr(sp, rscratch1, -16); + comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; + __ sub(rscratch1, sp, comp_words_on_stack * wordSize); + __ andr(sp, rscratch1, -16); } // Will jump to the compiled code just as if compiled code was doing it. @@ -570,19 +671,23 @@ } #endif // INCLUDE_JVMCI + int total_args_passed = sig->length(); + // Now generate the shuffle code. for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + BasicType bt = sig->at(i)._bt; + + assert(bt != T_VALUETYPE, "i2c adapter doesn't unpack value args"); + if (bt == T_VOID) { + assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half"); continue; } // Pick up 0, 1 or 2 words from SP+offset. + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); // Load in argument order going down. - int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize; + int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; // Point to interpreter value (vs. tag) int next_off = ld_off - Interpreter::stackElementSize; // @@ -596,7 +701,7 @@ } if (r_1->is_stack()) { // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; if (!r_2->is_valid()) { // sign extend??? __ ldrsw(rscratch2, Address(esp, ld_off)); @@ -613,39 +718,38 @@ // are accessed as negative so LSW is at LOW address // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; __ ldr(rscratch2, Address(esp, offset)); // st_off is LSW (i.e. reg.first()) - __ str(rscratch2, Address(sp, st_off)); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, - // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates - // two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the - // interpreter. + __ str(rscratch2, Address(sp, st_off)); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, + // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates + // two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the + // interpreter. + + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; + + // this can be a misaligned move + __ ldr(r, Address(esp, offset)); + } else { + // sign extend and use a full word? + __ ldrw(r, Address(esp, ld_off)); + } + } else { + if (!r_2->is_valid()) { + __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); + } else { + __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); + } + } + } - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - - // this can be a misaligned move - __ ldr(r, Address(esp, offset)); - } else { - // sign extend and use a full word? - __ ldrw(r, Address(esp, ld_off)); - } - } else { - if (!r_2->is_valid()) { - __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off)); - } else { - __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off)); - } - } - } // 6243940 We might end up in handle_wrong_method if // the callee is deoptimized as we race thru here. If that @@ -658,7 +762,6 @@ // and the vm will find there should this case occur. __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); - __ br(rscratch1); } @@ -727,32 +830,7 @@ } #endif -// --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { - address i2c_entry = __ pc(); -#ifdef BUILTIN_SIM - char *name = NULL; - AArch64Simulator *sim = NULL; - size_t len = 65536; - if (NotifySimulator) { - name = NEW_C_HEAP_ARRAY(char, len, mtInternal); - } - - if (name) { - generate_i2c_adapter_name(name, total_args_passed, sig_bt); - sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck); - sim->notifyCompile(name, i2c_entry); - } -#endif - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); - - address c2i_unverified_entry = __ pc(); - Label skip_fixup; +static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) { Label ok; @@ -788,21 +866,77 @@ __ block_comment("} c2i_unverified_entry"); } - address c2i_entry = __ pc(); -#ifdef BUILTIN_SIM - if (name) { - name[0] = 'c'; - name[2] = 'i'; - sim->notifyCompile(name, c2i_entry); - FREE_C_HEAP_ARRAY(char, name, mtInternal); +} + + + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int comp_args_on_stack, + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + AdapterFingerPrint* fingerprint, + AdapterBlob*& new_adapter) { + + address i2c_entry = __ pc(); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + + + gen_inline_cache_check(masm, skip_fixup); + + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; + + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) + address c2i_value_ro_entry = __ pc(); + if (regs_cc != regs_cc_ro) { + Label unused; + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); + skip_fixup = unused; } -#endif - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + // Scalarized c2i adapter + address c2i_entry = __ pc(); + + // Not implemented + // BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + // bs->c2i_entry_barrier(masm); + + gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true); + + address c2i_unverified_value_entry = c2i_unverified_entry; + + // Non-scalarized c2i adapter + address c2i_value_entry = c2i_entry; + if (regs != regs_cc) { + Label value_entry_skip_fixup; + c2i_unverified_value_entry = __ pc(); + gen_inline_cache_check(masm, value_entry_skip_fixup); + + c2i_value_entry = __ pc(); + Label unused; + gen_c2i_adapter(masm, sig, regs, value_entry_skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); + } __ flush(); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + + // The c2i adapter might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + + bool caller_must_gc_arguments = (regs != regs_cc); + new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words + 10, oop_maps, caller_must_gc_arguments); + + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_value_ro_entry, c2i_unverified_entry, c2i_unverified_value_entry); + } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -845,6 +979,7 @@ // fall through case T_OBJECT: case T_ARRAY: + case T_VALUETYPE: case T_ADDRESS: case T_METADATA: if (int_args < Argument::n_int_register_parameters_c) { @@ -1721,6 +1856,7 @@ int_args++; break; } + case T_VALUETYPE: case T_OBJECT: assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], @@ -1902,6 +2038,7 @@ case T_LONG: return_type = 1; break; case T_ARRAY: + case T_VALUETYPE: case T_OBJECT: return_type = 1; break; case T_FLOAT: @@ -1934,6 +2071,7 @@ // Result is in v0 we'll save as needed break; case T_ARRAY: // Really a handle + case T_VALUETYPE: case T_OBJECT: // Really a handle break; // can't de-handlize until after safepoint check case T_VOID: break; @@ -2038,7 +2176,7 @@ __ reset_last_Java_frame(false); // Unbox oop result, e.g. JNIHandles::resolve result. - if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) { __ resolve_jobject(r0, rthread, rscratch2); } @@ -3194,3 +3332,108 @@ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); } #endif // COMPILER2_OR_JVMCI + +BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) { + BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K); + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler _masm(&buffer); + MacroAssembler* masm = &_masm; + + const Array* sig_vk = vk->extended_sig(); + const Array* regs = vk->return_regs(); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address to(r0, off); + if (bt == T_FLOAT) { + __ strs(r_1->as_FloatRegister(), to); + } else if (bt == T_DOUBLE) { + __ strd(r_1->as_FloatRegister(), to); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + Register val = r_1->as_Register(); + assert_different_registers(r0, val); + // We don't need barriers because the destination is a newly allocated object. + // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. + if (UseCompressedOops) { + __ encode_heap_oop(val); + __ str(val, to); + } else { + __ str(val, to); + } + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(r0, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ store_sized_value(to, r_1->as_Register(), size_in_bytes); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ ret(lr); + + int unpack_fields_off = __ offset(); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(r0, off); + if (bt == T_FLOAT) { + __ ldrs(r_1->as_FloatRegister(), from); + } else if (bt == T_DOUBLE) { + __ ldrd(r_1->as_FloatRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + assert_different_registers(r0, r_1->as_Register()); + __ load_heap_oop(r_1->as_Register(), from); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(r0, r_1->as_Register()); + + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ ret(lr); + + __ flush(); + + return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off); +} --- old/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-07-12 11:39:05.742325929 +0000 +++ new/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp 2019-07-12 11:39:04.618278721 +0000 @@ -320,7 +320,7 @@ return_address = __ pc(); // store result depending on type (everything that is not - // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + // T_OBJECT, T_VALUETYPE, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) // n.b. this assumes Java returns an integral result in r0 // and a floating result in j_farg0 __ ldr(j_rarg2, result); @@ -328,6 +328,8 @@ __ ldr(j_rarg1, result_type); __ cmp(j_rarg1, (u1)T_OBJECT); __ br(Assembler::EQ, is_long); + __ cmp(j_rarg1, (u1)T_VALUETYPE); + __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_LONG); __ br(Assembler::EQ, is_long); __ cmp(j_rarg1, (u1)T_FLOAT); @@ -1829,7 +1831,7 @@ __ align(OptoLoopAlignment); __ BIND(L_store_element); - __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, AS_RAW); // store the oop + __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop __ sub(count, count, 1); __ cbz(count, L_do_card_marks); @@ -5650,6 +5652,184 @@ }; + // Call here from the interpreter or compiled code to either load + // multiple returned values from the value type instance being + // returned to registers or to store returned values to a newly + // allocated value type instance. + address generate_return_value_stub(address destination, const char* name, bool has_res) { + + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0 + enum layout { + rfp_off = 0, rfp_off2, + + j_rarg7_off, j_rarg7_2, + j_rarg6_off, j_rarg6_2, + j_rarg5_off, j_rarg5_2, + j_rarg4_off, j_rarg4_2, + j_rarg3_off, j_rarg3_2, + j_rarg2_off, j_rarg2_2, + j_rarg1_off, j_rarg1_2, + j_rarg0_off, j_rarg0_2, + + j_farg0_off, j_farg0_2, + j_farg1_off, j_farg1_2, + j_farg2_off, j_farg2_2, + j_farg3_off, j_farg3_2, + j_farg4_off, j_farg4_2, + j_farg5_off, j_farg5_2, + j_farg6_off, j_farg6_2, + j_farg7_off, j_farg7_2, + + return_off, return_off2, + framesize // inclusive of return address + }; + + int insts_size = 512; + int locs_size = 64; + + CodeBuffer code(name, insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + const Address f7_save (rfp, j_farg7_off * wordSize); + const Address f6_save (rfp, j_farg6_off * wordSize); + const Address f5_save (rfp, j_farg5_off * wordSize); + const Address f4_save (rfp, j_farg4_off * wordSize); + const Address f3_save (rfp, j_farg3_off * wordSize); + const Address f2_save (rfp, j_farg2_off * wordSize); + const Address f1_save (rfp, j_farg1_off * wordSize); + const Address f0_save (rfp, j_farg0_off * wordSize); + + const Address r0_save (rfp, j_rarg0_off * wordSize); + const Address r1_save (rfp, j_rarg1_off * wordSize); + const Address r2_save (rfp, j_rarg2_off * wordSize); + const Address r3_save (rfp, j_rarg3_off * wordSize); + const Address r4_save (rfp, j_rarg4_off * wordSize); + const Address r5_save (rfp, j_rarg5_off * wordSize); + const Address r6_save (rfp, j_rarg6_off * wordSize); + const Address r7_save (rfp, j_rarg7_off * wordSize); + + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + + map->set_callee_saved(VMRegImpl::stack2reg(rfp_off), rfp->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg7_off), j_rarg7->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg6_off), j_rarg6->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg5_off), j_rarg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg4_off), j_rarg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg3_off), j_rarg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg2_off), j_rarg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg1_off), j_rarg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_rarg0_off), j_rarg0->as_VMReg()); + + map->set_callee_saved(VMRegImpl::stack2reg(j_farg0_off), j_farg0->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg1_off), j_farg1->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg2_off), j_farg2->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg3_off), j_farg3->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg4_off), j_farg4->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg5_off), j_farg5->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg6_off), j_farg6->as_VMReg()); + map->set_callee_saved(VMRegImpl::stack2reg(j_farg7_off), j_farg7->as_VMReg()); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM + + __ enter(); // Save FP and LR before call + + assert(is_even(framesize/2), "sp not 16-byte aligned"); + + // lr and fp are already in place + __ sub(sp, rfp, ((unsigned)framesize - 4) << LogBytesPerInt); // prolog + + __ strd(j_farg7, f7_save); + __ strd(j_farg6, f6_save); + __ strd(j_farg5, f5_save); + __ strd(j_farg4, f4_save); + __ strd(j_farg3, f3_save); + __ strd(j_farg2, f2_save); + __ strd(j_farg1, f1_save); + __ strd(j_farg0, f0_save); + + __ str(j_rarg0, r0_save); + __ str(j_rarg1, r1_save); + __ str(j_rarg2, r2_save); + __ str(j_rarg3, r3_save); + __ str(j_rarg4, r4_save); + __ str(j_rarg5, r5_save); + __ str(j_rarg6, r6_save); + __ str(j_rarg7, r7_save); + + int frame_complete = __ pc() - start; + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); + + // Call runtime + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, r0); + + BLOCK_COMMENT("call runtime_entry"); + __ mov(rscratch1, destination); + __ blrt(rscratch1, 2 /* number_of_arguments */, 0, 1); + + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(false); + __ maybe_isb(); + + __ ldrd(j_farg7, f7_save); + __ ldrd(j_farg6, f6_save); + __ ldrd(j_farg5, f5_save); + __ ldrd(j_farg4, f4_save); + __ ldrd(j_farg3, f3_save); + __ ldrd(j_farg3, f2_save); + __ ldrd(j_farg1, f1_save); + __ ldrd(j_farg0, f0_save); + + __ ldr(j_rarg0, r0_save); + __ ldr(j_rarg1, r1_save); + __ ldr(j_rarg2, r2_save); + __ ldr(j_rarg3, r3_save); + __ ldr(j_rarg4, r4_save); + __ ldr(j_rarg5, r5_save); + __ ldr(j_rarg6, r6_save); + __ ldr(j_rarg7, r7_save); + + __ leave(); + + // check for pending exceptions + Label pending; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cmp(rscratch1, (u1)NULL_WORD); + __ br(Assembler::NE, pending); + + if (has_res) { + __ get_vm_result(r0, rthread); + } + __ ret(lr); + + __ bind(pending); + __ ldr(r0, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + + // codeBlob framesize is in words (not VMRegImpl::slot_size) + int frame_size_in_words = (framesize >> (LogBytesPerWord - LogBytesPerInt)); + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, &code, frame_complete, frame_size_in_words, oop_maps, false); + + return stub->entry_point(); + } + // Initialization void generate_initial() { // Generate initial stubs and initializes the entry points @@ -5699,6 +5879,12 @@ if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); } + + + StubRoutines::_load_value_type_fields_in_regs = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::load_value_type_fields_in_regs), "load_value_type_fields_in_regs", false); + StubRoutines::_store_value_type_fields_to_buf = + generate_return_value_stub(CAST_FROM_FN_PTR(address, SharedRuntime::store_value_type_fields_to_buf), "store_value_type_fields_to_buf", true); } void generate_all() { --- old/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-07-12 11:39:08.238430763 +0000 +++ new/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp 2019-07-12 11:39:07.082382210 +0000 @@ -38,6 +38,7 @@ #include "oops/methodData.hpp" #include "oops/method.hpp" #include "oops/oop.inline.hpp" +#include "oops/valueKlass.hpp" #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/arguments.hpp" @@ -440,6 +441,7 @@ __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); // and NULL it as marker that esp is now tos until next java call __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ restore_bcp(); __ restore_locals(); __ restore_constant_pool_cache(); @@ -566,6 +568,7 @@ case T_VOID : /* nothing to do */ break; case T_FLOAT : /* nothing to do */ break; case T_DOUBLE : /* nothing to do */ break; + case T_VALUETYPE: // fall through (value types are handled with oops) case T_OBJECT : // retrieve result from frame __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); --- old/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-07-12 11:39:10.550527870 +0000 +++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-07-12 11:39:09.406479821 +0000 @@ -147,7 +147,7 @@ Register val, DecoratorSet decorators) { assert(val == noreg || val == r0, "parameter is just for looks"); - __ store_heap_oop(dst, val, r10, r1, decorators); + __ store_heap_oop(dst, val, r10, r1, noreg, decorators); } static void do_oop_load(InterpreterMacroAssembler* _masm, @@ -170,6 +170,7 @@ Label L_patch_done; switch (bc) { + case Bytecodes::_fast_qputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -745,10 +746,10 @@ } Label ok; __ br(Assembler::LO, ok); - // ??? convention: move array into r3 for exception message - __ mov(r3, array); - __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); - __ br(rscratch1); + // ??? convention: move array into r3 for exception message + __ mov(r3, array); + __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ br(rscratch1); __ bind(ok); } @@ -808,11 +809,21 @@ // r0: array // r1: index index_check(r0, r1); // leaves index in r1, kills rscratch1 - __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - do_oop_load(_masm, - Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), - r0, - IS_ARRAY); + if (ValueArrayFlatten) { + Label is_flat_array, done; + + __ test_flattened_array_oop(r0, r8 /*temp*/, is_flat_array); + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + + __ b(done); + __ bind(is_flat_array); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_load), r0, r1); + __ bind(done); + } else { + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + } } void TemplateTable::baload() @@ -1102,37 +1113,47 @@ Label is_null, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value - __ ldr(r0, at_tos()); // value + __ ldr(r0, at_tos()); // value __ ldr(r2, at_tos_p1()); // index __ ldr(r3, at_tos_p2()); // array Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop)); index_check(r3, r2); // kills r1 - __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + + // DMS CHECK: what does line below do? + __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); // do array store check - check for NULL value first __ cbz(r0, is_null); + Label is_flat_array; + if (ValueArrayFlatten) { + __ test_flattened_array_oop(r3, r8 /*temp*/, is_flat_array); + } + // Move subklass into r1 __ load_klass(r1, r0); + // Move superklass into r0 __ load_klass(r0, r3); - __ ldr(r0, Address(r0, - ObjArrayKlass::element_klass_offset())); + __ ldr(r0, Address(r0, ObjArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees r2. // Generate subtype check. Blows r2, r5 // Superklass in r0. Subklass in r1. + __ gen_subtype_check(r1, ok_is_subtype); // Come here on failure // object is at TOS __ b(Interpreter::_throw_ArrayStoreException_entry); + // Come here on success __ bind(ok_is_subtype); + // Get the value we will store __ ldr(r0, at_tos()); // Now store using the appropriate barrier @@ -1143,8 +1164,61 @@ __ bind(is_null); __ profile_null_seen(r2); + if (EnableValhalla) { + Label is_null_into_value_array_npe, store_null; + + // No way to store null in flat array + __ test_null_free_array_oop(r3, r8, is_null_into_value_array_npe); + __ b(store_null); + + __ bind(is_null_into_value_array_npe); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } + // Store a NULL do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ b(done); + + if (EnableValhalla) { + Label is_type_ok; + + // store non-null value + __ bind(is_flat_array); + + // Simplistic type check... + // r0 - value, r2 - index, r3 - array. + + // Profile the not-null value's klass. + // Load value class + __ load_klass(r1, r0); + __ profile_typecheck(r2, r1, r0); // blows r2, and r0 + + // flat value array needs exact type match + // is "r8 == r0" (value subclass == array element superclass) + + // Move element klass into r0 + + __ load_klass(r0, r3); + + __ ldr(r0, Address(r0, ArrayKlass::element_klass_offset())); + __ cmp(r0, r1); + __ br(Assembler::EQ, is_type_ok); + + __ profile_typecheck_failed(r2); + __ b(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry)); + + __ bind(is_type_ok); + + // DMS CHECK: Reload from TOS to be safe, because of profile_typecheck that blows r2 and r0. + // Should we really do it? + __ ldr(r1, at_tos()); // value + __ mov(r2, r3); // array, ldr(r2, at_tos_p2()); + __ ldr(r3, at_tos_p1()); // index + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_store), r1, r2, r3); + } + // Pop stack arguments __ bind(done); @@ -2021,19 +2095,86 @@ __ profile_not_taken_branch(r0); } -void TemplateTable::if_acmp(Condition cc) -{ +void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(r1); + + Register is_value_mask = rscratch1; + __ mov(is_value_mask, markOopDesc::always_locked_pattern); + + if (EnableValhalla && ACmpOnValues == 3) { + __ cmp(r1, r0); + __ br(Assembler::EQ, (cc == equal) ? taken : not_taken); + + // might be substitutable, test if either r0 or r1 is null + __ andr(r2, r0, r1); + __ cbz(r2, (cc == equal) ? not_taken : taken); + + // and both are values ? + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ ldr(r4, Address(r0, oopDesc::mark_offset_in_bytes())); + __ andr(r4, r4, is_value_mask); + __ andr(r2, r2, r4); + __ cmp(r2, is_value_mask); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // same value klass ? + __ load_metadata(r2, r1); + __ load_metadata(r4, r0); + __ cmp(r2, r4); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // Know both are the same type, let's test for substitutability... + if (cc == equal) { + invoke_is_substitutable(r0, r1, taken, not_taken); + } else { + invoke_is_substitutable(r0, r1, not_taken, taken); + } + __ stop("Not reachable"); + } + + if (EnableValhalla && ACmpOnValues == 1) { + Label is_null; + __ cbz(r1, is_null); + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ cmp(r2, is_value_mask); + __ cset(r2, Assembler::EQ); + __ orr(r1, r1, r2); + __ bind(is_null); + } + __ cmpoop(r1, r0); + + if (EnableValhalla && ACmpOnValues == 2) { + __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken); + __ cbz(r1, (cc == equal) ? taken : not_taken); + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ cmp(r2, is_value_mask); + cc = (cc == equal) ? not_equal : equal; + } + __ br(j_not(cc), not_taken); + __ bind(taken); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(r0); } +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, + Label& is_subst, Label& not_subst) { + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + // Restored... r0 answer, jmp to outcome... + __ cbz(r0, not_subst); + __ b(is_subst); +} + + void TemplateTable::ret() { transition(vtos, vtos); // We might be moving to a safepoint. The thread which calls @@ -2283,7 +2424,7 @@ __ narrow(r0); } - __ remove_activation(state); + __ remove_activation(state); __ ret(lr); } @@ -2497,8 +2638,7 @@ // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract - __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, - ConstantPoolCacheEntry::tos_state_bits); + __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); assert(btos == 0, "change code, btos != 0"); __ cbnz(flags, notByte); @@ -2533,12 +2673,68 @@ __ cmp(flags, (u1)atos); __ br(Assembler::NE, notObj); // atos - do_oop_load(_masm, field, r0, IN_HEAP); - __ push(atos); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + if (!EnableValhalla) { + do_oop_load(_masm, field, r0, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + } else { // Valhalla + + if (is_static) { + __ load_heap_oop(r0, field); + Label isFlattenable, isUninitialized; + // Issue below if the static field has not been initialized yet + __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable); + // Not flattenable case + __ push(atos); + __ b(Done); + // Flattenable case, must not return null even if uninitialized + __ bind(isFlattenable); + __ cbz(r0, isUninitialized); + __ push(atos); + __ b(Done); + __ bind(isUninitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field), obj, raw_flags); + __ verify_oop(r0); + __ push(atos); + __ b(Done); + } else { + Label isFlattened, isInitialized, isFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable); + // Non-flattenable field case, also covers the object case + __ load_heap_oop(r0, field); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + __ bind(isFlattenable); + __ test_field_is_flattened(raw_flags, r8 /* temp */, isFlattened); + // Non-flattened field case + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), obj, raw_flags); + __ bind(isInitialized); + __ verify_oop(r0); + __ push(atos); + __ b(rewriteFlattenable); + __ bind(isFlattened); + __ ldr(r10, Address(cache, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), obj, raw_flags, r10); + __ verify_oop(r0); + __ push(atos); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qgetfield, bc, r1); + } + __ b(Done); + } } - __ b(Done); __ bind(notObj); __ cmp(flags, (u1)itos); @@ -2708,6 +2904,7 @@ const Register obj = r2; const Register off = r19; const Register flags = r0; + const Register flags2 = r6; const Register bc = r4; resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); @@ -2730,6 +2927,8 @@ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + __ mov(flags2, flags); + // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); @@ -2772,14 +2971,56 @@ // atos { - __ pop(atos); - if (!is_static) pop_and_check_object(obj); - // Store into the field - do_oop_store(_masm, field, r0, IN_HEAP); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); - } - __ b(Done); + if (!EnableValhalla) { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } else { // Valhalla + + __ pop(atos); + if (is_static) { + Label notFlattenable; + __ test_field_is_not_flattenable(flags2, r8 /* temp */, notFlattenable); + __ null_check(r0); + __ bind(notFlattenable); + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(Done); + } else { + Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(flags2, r8 /*temp*/, isFlattenable); + // Not flattenable case, covers not flattenable values and objects + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ bind(rewriteNotFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); + } + __ b(Done); + // Implementation of the flattenable semantic + __ bind(isFlattenable); + __ null_check(r0); + __ test_field_is_flattened(flags2, r8 /*temp*/, isFlattened); + // Not flattened case + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(rewriteFlattenable); + __ bind(isFlattened); + pop_and_check_object(obj); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, off, obj); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qputfield, bc, r19, true, byte_no); + } + __ b(Done); + } + } // Valhalla } __ bind(notObj); @@ -2919,6 +3160,7 @@ // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2945,6 +3187,7 @@ r19, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2995,6 +3238,19 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qputfield: //fall through + { + Label isFlattened, done; + __ null_check(r0); + __ test_field_is_flattened(r3, r8 /* temp */, isFlattened); + // No Flattened case + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(done); + __ bind(isFlattened); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, r1, r2); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: do_oop_store(_masm, field, r0, IN_HEAP); break; @@ -3088,6 +3344,32 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qgetfield: + { + Label isFlattened, isInitialized, Done; + // DMS CHECK: We don't need to reload multiple times, but stay close to original code + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ test_field_is_flattened(r9, r8 /* temp */, isFlattened); + // Non-flattened field case + __ mov(r9, r0); + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ mov(r0, r9); + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), r0, r9); + __ bind(isInitialized); + __ verify_oop(r0); + __ b(Done); + __ bind(isFlattened); + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask); + __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), r0, r9, r3); + __ verify_oop(r0); + __ bind(Done); + } + break; case Bytecodes::_fast_agetfield: do_oop_load(_masm, field, r0, IN_HEAP); __ verify_oop(r0); @@ -3644,6 +3926,30 @@ __ membar(Assembler::StoreStore); } +void TemplateTable::defaultvalue() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue), + c_rarg1, c_rarg2); + __ verify_oop(r0); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::withfield() { + transition(vtos, atos); + resolve_cache_and_index(f2_byte, c_rarg1 /*cache*/, c_rarg2 /*index*/, sizeof(u2)); + + // n.b. unlike x86 cache is now rcpool plus the indexed offset + // so using rcpool to meet shared code expectations + + call_VM(r1, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), rcpool); + __ verify_oop(r1); + __ add(esp, esp, r0); + __ mov(r0, r1); +} + void TemplateTable::newarray() { transition(itos, atos); __ load_unsigned_byte(c_rarg1, at_bcp(1)); @@ -3715,14 +4021,29 @@ __ bind(ok_is_subtype); __ mov(r0, r3); // Restore object in r3 + __ b(done); + __ bind(is_null); + // Collect counts on whether this test sees NULLs a lot or not. if (ProfileInterpreter) { - __ b(done); - __ bind(is_null); __ profile_null_seen(r2); - } else { - __ bind(is_null); // same as 'done' } + + if (EnableValhalla) { + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ lea(r1, Address(rscratch1, r19)); + __ ldarb(r1, r1); + // See if CP entry is a Q-descriptor + __ andr (r1, r1, JVM_CONSTANT_QDescBit); + __ cmp(r1, (u1) JVM_CONSTANT_QDescBit); + __ br(Assembler::NE, done); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + } + __ bind(done); } --- old/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp 2019-07-12 11:39:13.026631867 +0000 +++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.hpp 2019-07-12 11:39:11.898584489 +0000 @@ -39,4 +39,6 @@ static void index_check(Register array, Register index); static void index_check_without_pop(Register array, Register index); + static void invoke_is_substitutable(Register aobj, Register bobj, Label& is_subst, Label& not_subst); + #endif // CPU_AARCH64_TEMPLATETABLE_AARCH64_HPP --- old/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp 2019-07-12 11:39:15.154721247 +0000 +++ new/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp 2019-07-12 11:39:14.054675045 +0000 @@ -47,10 +47,10 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { +VtableStub* VtableStubs::create_vtable_stub(int vtable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index, caller_is_c1); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; @@ -63,6 +63,10 @@ int slop_bytes = 0; int slop_delta = 0; +// No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. + const int index_dependent_slop = 0; + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_value_offset() : Method::from_compiled_value_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -116,7 +120,7 @@ if (DebugVtables) { Label L; __ cbz(rmethod, L); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ cbnz(rscratch1, L); __ stop("Vtable entry is NULL"); __ bind(L); @@ -127,20 +131,21 @@ // rmethod: Method* // r2: receiver address ame_addr = __ pc(); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ br(rscratch1); masm->flush(); - bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); return s; } -VtableStub* VtableStubs::create_itable_stub(int itable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index, bool caller_is_c1) { // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index, caller_is_c1); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; @@ -152,6 +157,10 @@ int slop_bytes = 0; int slop_delta = 0; + const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16). + (itable_index < 16) ? 3 : 0; // index == 0 generates even shorter code. + ByteSize entry_offset = caller_is_c1 ? Method::from_compiled_value_offset() : Method::from_compiled_value_ro_offset(); + ResourceMark rm; CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -221,7 +230,7 @@ if (DebugVtables) { Label L2; __ cbz(rmethod, L2); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ cbnz(rscratch1, L2); __ stop("compiler entrypoint is null"); __ bind(L2); @@ -231,7 +240,7 @@ // rmethod: Method* // j_rarg0: receiver address ame_addr = __ pc(); - __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ ldr(rscratch1, Address(rmethod, entry_offset)); __ br(rscratch1); __ bind(L_no_such_interface); @@ -244,7 +253,8 @@ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); masm->flush(); - bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); + slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop); return s; } --- old/src/hotspot/share/runtime/arguments.cpp 2019-07-12 11:39:17.238808781 +0000 +++ new/src/hotspot/share/runtime/arguments.cpp 2019-07-12 11:39:16.190764762 +0000 @@ -2111,12 +2111,12 @@ status = status && GCArguments::check_args_consistency(); - if (LP64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypePassFieldsAsArgs)) { + if (AMD64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypePassFieldsAsArgs)) { FLAG_SET_CMDLINE(ValueTypePassFieldsAsArgs, false); warning("ValueTypePassFieldsAsArgs is not supported on this platform"); } - if (LP64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypeReturnedAsFields)) { + if (AMD64_ONLY(false &&) !FLAG_IS_DEFAULT(ValueTypeReturnedAsFields)) { FLAG_SET_CMDLINE(ValueTypeReturnedAsFields, false); warning("ValueTypeReturnedAsFields is not supported on this platform"); } --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java 2019-07-12 11:39:19.494903539 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestArrays.java 2019-07-12 11:39:18.394857336 +0000 @@ -32,7 +32,7 @@ * @test * @summary Test value type arrays * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestArrays.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConvention.java 2019-07-12 11:39:21.734997627 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConvention.java 2019-07-12 11:39:20.614950583 +0000 @@ -32,7 +32,7 @@ * @test * @summary Test value type calling convention optimizations * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestCallingConvention.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java 2019-07-12 11:39:23.867087179 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestCallingConventionC1.java 2019-07-12 11:39:22.763040806 +0000 @@ -30,7 +30,7 @@ * @test * @summary Test calls from {C1} to {C2, Interpreter}, and vice versa. * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestCallingConventionC1.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java 2019-07-12 11:39:26.139182612 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestIntrinsics.java 2019-07-12 11:39:25.015135399 +0000 @@ -36,7 +36,7 @@ * @summary Test intrinsic support for value types * @library /testlibrary /test/lib /compiler/whitebox / * @modules java.base/jdk.internal.misc - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestIntrinsics.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestJNICalls.java 2019-07-12 11:39:28.147266957 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestJNICalls.java 2019-07-12 11:39:27.127224112 +0000 @@ -31,7 +31,7 @@ * @test * @summary Test calling native methods with value type arguments from compiled code. * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestJNICalls.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=120 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java 2019-07-12 11:39:30.111349455 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestLWorld.java 2019-07-12 11:39:29.079306105 +0000 @@ -35,7 +35,7 @@ * @summary Test value types in LWorld. * @modules java.base/jdk.experimental.value * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestLWorld.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableArrays.java 2019-07-12 11:39:32.247439178 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableArrays.java 2019-07-12 11:39:31.191394820 +0000 @@ -31,7 +31,7 @@ * @test * @summary Test nullable value type arrays * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestNullableArrays.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableValueTypes.java 2019-07-12 11:39:34.379528734 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestNullableValueTypes.java 2019-07-12 11:39:33.295483199 +0000 @@ -32,7 +32,7 @@ * @test * @summary Test correct handling of nullable value types. * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestNullableValueTypes.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions --- old/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestOnStackReplacement.java 2019-07-12 11:39:36.583621315 +0000 +++ new/test/hotspot/jtreg/compiler/valhalla/valuetypes/TestOnStackReplacement.java 2019-07-12 11:39:35.467574436 +0000 @@ -30,7 +30,7 @@ * @test * @summary Test on stack replacement (OSR) with value types * @library /testlibrary /test/lib /compiler/whitebox / - * @requires os.simpleArch == "x64" + * @requires (os.simpleArch == "x64" | os.simpleArch == "aarch64") * @compile TestOnStackReplacement.java * @run driver ClassFileInstaller sun.hotspot.WhiteBox jdk.test.lib.Platform * @run main/othervm/timeout=300 -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions