--- old/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-07-12 11:39:10.550527870 +0000 +++ new/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp 2019-07-12 11:39:09.406479821 +0000 @@ -147,7 +147,7 @@ Register val, DecoratorSet decorators) { assert(val == noreg || val == r0, "parameter is just for looks"); - __ store_heap_oop(dst, val, r10, r1, decorators); + __ store_heap_oop(dst, val, r10, r1, noreg, decorators); } static void do_oop_load(InterpreterMacroAssembler* _masm, @@ -170,6 +170,7 @@ Label L_patch_done; switch (bc) { + case Bytecodes::_fast_qputfield: case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: @@ -745,10 +746,10 @@ } Label ok; __ br(Assembler::LO, ok); - // ??? convention: move array into r3 for exception message - __ mov(r3, array); - __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); - __ br(rscratch1); + // ??? convention: move array into r3 for exception message + __ mov(r3, array); + __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ br(rscratch1); __ bind(ok); } @@ -808,11 +809,21 @@ // r0: array // r1: index index_check(r0, r1); // leaves index in r1, kills rscratch1 - __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); - do_oop_load(_masm, - Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), - r0, - IS_ARRAY); + if (ValueArrayFlatten) { + Label is_flat_array, done; + + __ test_flattened_array_oop(r0, r8 /*temp*/, is_flat_array); + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + + __ b(done); + __ bind(is_flat_array); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_load), r0, r1); + __ bind(done); + } else { + __ add(r1, r1, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + do_oop_load(_masm, Address(r0, r1, Address::uxtw(LogBytesPerHeapOop)), r0, IS_ARRAY); + } } void TemplateTable::baload() @@ -1102,37 +1113,47 @@ Label is_null, ok_is_subtype, done; transition(vtos, vtos); // stack: ..., array, index, value - __ ldr(r0, at_tos()); // value + __ ldr(r0, at_tos()); // value __ ldr(r2, at_tos_p1()); // index __ ldr(r3, at_tos_p2()); // array Address element_address(r3, r4, Address::uxtw(LogBytesPerHeapOop)); index_check(r3, r2); // kills r1 - __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); + + // DMS CHECK: what does line below do? + __ add(r4, r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); // do array store check - check for NULL value first __ cbz(r0, is_null); + Label is_flat_array; + if (ValueArrayFlatten) { + __ test_flattened_array_oop(r3, r8 /*temp*/, is_flat_array); + } + // Move subklass into r1 __ load_klass(r1, r0); + // Move superklass into r0 __ load_klass(r0, r3); - __ ldr(r0, Address(r0, - ObjArrayKlass::element_klass_offset())); + __ ldr(r0, Address(r0, ObjArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees r2. // Generate subtype check. Blows r2, r5 // Superklass in r0. Subklass in r1. + __ gen_subtype_check(r1, ok_is_subtype); // Come here on failure // object is at TOS __ b(Interpreter::_throw_ArrayStoreException_entry); + // Come here on success __ bind(ok_is_subtype); + // Get the value we will store __ ldr(r0, at_tos()); // Now store using the appropriate barrier @@ -1143,8 +1164,61 @@ __ bind(is_null); __ profile_null_seen(r2); + if (EnableValhalla) { + Label is_null_into_value_array_npe, store_null; + + // No way to store null in flat array + __ test_null_free_array_oop(r3, r8, is_null_into_value_array_npe); + __ b(store_null); + + __ bind(is_null_into_value_array_npe); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + + __ bind(store_null); + } + // Store a NULL do_oop_store(_masm, element_address, noreg, IS_ARRAY); + __ b(done); + + if (EnableValhalla) { + Label is_type_ok; + + // store non-null value + __ bind(is_flat_array); + + // Simplistic type check... + // r0 - value, r2 - index, r3 - array. + + // Profile the not-null value's klass. + // Load value class + __ load_klass(r1, r0); + __ profile_typecheck(r2, r1, r0); // blows r2, and r0 + + // flat value array needs exact type match + // is "r8 == r0" (value subclass == array element superclass) + + // Move element klass into r0 + + __ load_klass(r0, r3); + + __ ldr(r0, Address(r0, ArrayKlass::element_klass_offset())); + __ cmp(r0, r1); + __ br(Assembler::EQ, is_type_ok); + + __ profile_typecheck_failed(r2); + __ b(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry)); + + __ bind(is_type_ok); + + // DMS CHECK: Reload from TOS to be safe, because of profile_typecheck that blows r2 and r0. + // Should we really do it? + __ ldr(r1, at_tos()); // value + __ mov(r2, r3); // array, ldr(r2, at_tos_p2()); + __ ldr(r3, at_tos_p1()); // index + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::value_array_store), r1, r2, r3); + } + // Pop stack arguments __ bind(done); @@ -2021,19 +2095,86 @@ __ profile_not_taken_branch(r0); } -void TemplateTable::if_acmp(Condition cc) -{ +void TemplateTable::if_acmp(Condition cc) { transition(atos, vtos); // assume branch is more often taken than not (loops use backward branches) - Label not_taken; + Label taken, not_taken; __ pop_ptr(r1); + + Register is_value_mask = rscratch1; + __ mov(is_value_mask, markOopDesc::always_locked_pattern); + + if (EnableValhalla && ACmpOnValues == 3) { + __ cmp(r1, r0); + __ br(Assembler::EQ, (cc == equal) ? taken : not_taken); + + // might be substitutable, test if either r0 or r1 is null + __ andr(r2, r0, r1); + __ cbz(r2, (cc == equal) ? not_taken : taken); + + // and both are values ? + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ ldr(r4, Address(r0, oopDesc::mark_offset_in_bytes())); + __ andr(r4, r4, is_value_mask); + __ andr(r2, r2, r4); + __ cmp(r2, is_value_mask); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // same value klass ? + __ load_metadata(r2, r1); + __ load_metadata(r4, r0); + __ cmp(r2, r4); + __ br(Assembler::NE, (cc == equal) ? not_taken : taken); + + // Know both are the same type, let's test for substitutability... + if (cc == equal) { + invoke_is_substitutable(r0, r1, taken, not_taken); + } else { + invoke_is_substitutable(r0, r1, not_taken, taken); + } + __ stop("Not reachable"); + } + + if (EnableValhalla && ACmpOnValues == 1) { + Label is_null; + __ cbz(r1, is_null); + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ cmp(r2, is_value_mask); + __ cset(r2, Assembler::EQ); + __ orr(r1, r1, r2); + __ bind(is_null); + } + __ cmpoop(r1, r0); + + if (EnableValhalla && ACmpOnValues == 2) { + __ br(Assembler::NE, (cc == not_equal) ? taken : not_taken); + __ cbz(r1, (cc == equal) ? taken : not_taken); + __ ldr(r2, Address(r1, oopDesc::mark_offset_in_bytes())); + __ andr(r2, r2, is_value_mask); + __ cmp(r2, is_value_mask); + cc = (cc == equal) ? not_equal : equal; + } + __ br(j_not(cc), not_taken); + __ bind(taken); branch(false, false); __ bind(not_taken); __ profile_not_taken_branch(r0); } +void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj, + Label& is_subst, Label& not_subst) { + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj); + // Restored... r0 answer, jmp to outcome... + __ cbz(r0, not_subst); + __ b(is_subst); +} + + void TemplateTable::ret() { transition(vtos, vtos); // We might be moving to a safepoint. The thread which calls @@ -2283,7 +2424,7 @@ __ narrow(r0); } - __ remove_activation(state); + __ remove_activation(state); __ ret(lr); } @@ -2497,8 +2638,7 @@ // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract - __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, - ConstantPoolCacheEntry::tos_state_bits); + __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); assert(btos == 0, "change code, btos != 0"); __ cbnz(flags, notByte); @@ -2533,12 +2673,68 @@ __ cmp(flags, (u1)atos); __ br(Assembler::NE, notObj); // atos - do_oop_load(_masm, field, r0, IN_HEAP); - __ push(atos); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + if (!EnableValhalla) { + do_oop_load(_masm, field, r0, IN_HEAP); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + } else { // Valhalla + + if (is_static) { + __ load_heap_oop(r0, field); + Label isFlattenable, isUninitialized; + // Issue below if the static field has not been initialized yet + __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable); + // Not flattenable case + __ push(atos); + __ b(Done); + // Flattenable case, must not return null even if uninitialized + __ bind(isFlattenable); + __ cbz(r0, isUninitialized); + __ push(atos); + __ b(Done); + __ bind(isUninitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field), obj, raw_flags); + __ verify_oop(r0); + __ push(atos); + __ b(Done); + } else { + Label isFlattened, isInitialized, isFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(raw_flags, r8 /*temp*/, isFlattenable); + // Non-flattenable field case, also covers the object case + __ load_heap_oop(r0, field); + __ push(atos); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, r1); + } + __ b(Done); + __ bind(isFlattenable); + __ test_field_is_flattened(raw_flags, r8 /* temp */, isFlattened); + // Non-flattened field case + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), obj, raw_flags); + __ bind(isInitialized); + __ verify_oop(r0); + __ push(atos); + __ b(rewriteFlattenable); + __ bind(isFlattened); + __ ldr(r10, Address(cache, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + __ andw(raw_flags, raw_flags, ConstantPoolCacheEntry::field_index_mask); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), obj, raw_flags, r10); + __ verify_oop(r0); + __ push(atos); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qgetfield, bc, r1); + } + __ b(Done); + } } - __ b(Done); __ bind(notObj); __ cmp(flags, (u1)itos); @@ -2708,6 +2904,7 @@ const Register obj = r2; const Register off = r19; const Register flags = r0; + const Register flags2 = r6; const Register bc = r4; resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); @@ -2730,6 +2927,8 @@ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + __ mov(flags2, flags); + // x86 uses a shift and mask or wings it with a shift plus assert // the mask is not needed. aarch64 just uses bitfield extract __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); @@ -2772,14 +2971,56 @@ // atos { - __ pop(atos); - if (!is_static) pop_and_check_object(obj); - // Store into the field - do_oop_store(_masm, field, r0, IN_HEAP); - if (rc == may_rewrite) { - patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); - } - __ b(Done); + if (!EnableValhalla) { + __ pop(atos); + if (!is_static) pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } else { // Valhalla + + __ pop(atos); + if (is_static) { + Label notFlattenable; + __ test_field_is_not_flattenable(flags2, r8 /* temp */, notFlattenable); + __ null_check(r0); + __ bind(notFlattenable); + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(Done); + } else { + Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable; + __ test_field_is_flattenable(flags2, r8 /*temp*/, isFlattenable); + // Not flattenable case, covers not flattenable values and objects + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ bind(rewriteNotFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r19, true, byte_no); + } + __ b(Done); + // Implementation of the flattenable semantic + __ bind(isFlattenable); + __ null_check(r0); + __ test_field_is_flattened(flags2, r8 /*temp*/, isFlattened); + // Not flattened case + pop_and_check_object(obj); + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(rewriteFlattenable); + __ bind(isFlattened); + pop_and_check_object(obj); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, off, obj); + __ bind(rewriteFlattenable); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_qputfield, bc, r19, true, byte_no); + } + __ b(Done); + } + } // Valhalla } __ bind(notObj); @@ -2919,6 +3160,7 @@ // to do it for every data type, we use the saved values as the // jvalue object. switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2945,6 +3187,7 @@ r19, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values + case Bytecodes::_fast_qputfield: //fall through case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; case Bytecodes::_fast_bputfield: // fall through case Bytecodes::_fast_zputfield: // fall through @@ -2995,6 +3238,19 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qputfield: //fall through + { + Label isFlattened, done; + __ null_check(r0); + __ test_field_is_flattened(r3, r8 /* temp */, isFlattened); + // No Flattened case + do_oop_store(_masm, field, r0, IN_HEAP); + __ b(done); + __ bind(isFlattened); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::write_flattened_value), r0, r1, r2); + __ bind(done); + } + break; case Bytecodes::_fast_aputfield: do_oop_store(_masm, field, r0, IN_HEAP); break; @@ -3088,6 +3344,32 @@ // access field switch (bytecode()) { + case Bytecodes::_fast_qgetfield: + { + Label isFlattened, isInitialized, Done; + // DMS CHECK: We don't need to reload multiple times, but stay close to original code + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ test_field_is_flattened(r9, r8 /* temp */, isFlattened); + // Non-flattened field case + __ mov(r9, r0); + __ load_heap_oop(r0, field); + __ cbnz(r0, isInitialized); + __ mov(r0, r9); + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field), r0, r9); + __ bind(isInitialized); + __ verify_oop(r0); + __ b(Done); + __ bind(isFlattened); + __ ldrw(r9, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()))); + __ andw(r9, r9, ConstantPoolCacheEntry::field_index_mask); + __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()))); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field), r0, r9, r3); + __ verify_oop(r0); + __ bind(Done); + } + break; case Bytecodes::_fast_agetfield: do_oop_load(_masm, field, r0, IN_HEAP); __ verify_oop(r0); @@ -3644,6 +3926,30 @@ __ membar(Assembler::StoreStore); } +void TemplateTable::defaultvalue() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue), + c_rarg1, c_rarg2); + __ verify_oop(r0); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::withfield() { + transition(vtos, atos); + resolve_cache_and_index(f2_byte, c_rarg1 /*cache*/, c_rarg2 /*index*/, sizeof(u2)); + + // n.b. unlike x86 cache is now rcpool plus the indexed offset + // so using rcpool to meet shared code expectations + + call_VM(r1, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), rcpool); + __ verify_oop(r1); + __ add(esp, esp, r0); + __ mov(r0, r1); +} + void TemplateTable::newarray() { transition(itos, atos); __ load_unsigned_byte(c_rarg1, at_bcp(1)); @@ -3715,14 +4021,29 @@ __ bind(ok_is_subtype); __ mov(r0, r3); // Restore object in r3 + __ b(done); + __ bind(is_null); + // Collect counts on whether this test sees NULLs a lot or not. if (ProfileInterpreter) { - __ b(done); - __ bind(is_null); __ profile_null_seen(r2); - } else { - __ bind(is_null); // same as 'done' } + + if (EnableValhalla) { + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r19, 1); // r19=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ lea(r1, Address(rscratch1, r19)); + __ ldarb(r1, r1); + // See if CP entry is a Q-descriptor + __ andr (r1, r1, JVM_CONSTANT_QDescBit); + __ cmp(r1, (u1) JVM_CONSTANT_QDescBit); + __ br(Assembler::NE, done); + __ b(ExternalAddress(Interpreter::_throw_NullPointerException_entry)); + } + __ bind(done); }