# HG changeset patch # User zyao # Date 1513928924 -28800 # Fri Dec 22 15:48:44 2017 +0800 # Node ID 1494b12d0a7d858637c0c0aebd8ae3dd776562c5 # Parent 18fb0362469636edea3e949eb668eca3dc4a5aa9 [RFC] MachSpillCopy peephole Enable OptoPeephole by default on AArch64. Add manually defined peephole() method for MachSpillCopy node. diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -3282,7 +3282,14 @@ } } else { // gpr --> stack spill assert(dst_lo_rc == rc_stack, "spill to bad register class"); - __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); + if (_spill_type == Pair) { + __ spill(as_Register(Matcher::_regEncode[src_lo]), + as_Register(Matcher::_regEncode[pair_hi_reg]), + is64, + dst_offset); + } else { + __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); + } } break; case rc_float: @@ -3310,7 +3317,15 @@ break; case rc_stack: if (dst_lo_rc == rc_int) { // stack --> gpr load - __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + if (_spill_type == Pair) { + assert(pair_hi_reg != OptoReg::Bad, "bad register"); + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[pair_hi_reg]), + is64, + src_offset); + } else { + __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); + } } else if (dst_lo_rc == rc_float) { // stack --> fpr load __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), is64 ? __ D : __ S, src_offset); @@ -3366,6 +3381,85 @@ return MachNode::size(ra_); } +MachNode *MachSpillCopyNode::peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted) { + MachSpillCopyNode *inst0 = this; + MachSpillCopyNode *inst1 = NULL; + if ( block_index - 1 > 0 ) { + Node *n = block->get_node(block_index - 1); + inst1 = (n->is_MachSpillCopy()) ? n->as_MachSpillCopy() : NULL; + } + if (inst1 == NULL) { + return NULL; + } + if (bottom_type()->isa_vect() != NULL || + inst1->bottom_type()->isa_vect() != NULL || + _spill_type == Pair || + inst1->_spill_type == Pair) { + return NULL; + } + // + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); + OptoReg::Name dst_lo = ra_->get_reg_first(this); + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_lo_rc = rc_class(dst_lo); + OptoReg::Name inst1_src_lo = ra_->get_reg_first(inst1->in(1)); + OptoReg::Name inst1_dst_lo = ra_->get_reg_first(inst1); + enum RC inst1_src_lo_rc = rc_class(inst1_src_lo); + enum RC inst1_dst_lo_rc = rc_class(inst1_dst_lo); + if (((src_lo_rc == rc_stack && dst_lo_rc == rc_int) + || (dst_lo_rc == rc_stack && src_lo_rc == rc_int)) + && ((inst1_src_lo_rc == rc_stack && inst1_dst_lo_rc == rc_int) + || (inst1_dst_lo_rc == rc_stack && inst1_src_lo_rc == rc_int))) { + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(this); + enum RC dst_hi_rc = rc_class(dst_hi); + enum RC src_hi_rc = rc_class(src_hi); + OptoReg::Name inst1_src_hi = ra_->get_reg_second(inst1->in(1)); + OptoReg::Name inst1_dst_hi = ra_->get_reg_second(inst1); + bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && + (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; + bool inst1_is64 = (inst1_src_lo & 1) == 0 && inst1_src_lo + 1 == inst1_src_hi && + (inst1_dst_lo & 1) == 0 && inst1_dst_lo + 1 == inst1_dst_hi; + if (is64 ^ inst1_is64) { + return NULL; + } + int offset = 0; + OptoReg::Name other_reg = OptoReg::Bad; + MachSpillCopyNode* pair = NULL; + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + int inst1_src_offset = ra_->reg2offset(inst1_src_lo); + int inst1_dst_offset = ra_->reg2offset(inst1_dst_lo); + if (dst_lo_rc == rc_stack) { + assert(dst_offset >= 0, "invalid offset"); + offset = dst_offset - inst1_dst_offset; + } else if (src_lo_rc == rc_stack) { + assert(src_offset >= 0, "invalid offset"); + offset = src_offset - inst1_src_offset; + } + // TODO alignment check for some CPU. + if ((is64 && abs(offset) == 8) || (!is64 && abs(offset) == 4)) { // TODO: 8/4 magic number + if (offset < 0) { // (this, inst1) + pair = this; + pair->pair_hi_reg = (src_lo_rc == rc_stack) ? inst1_dst_lo : inst1_src_lo; + if (Verbose) { + tty->print_cr("DEBUG: valid replacement found: %d && %d", pair->_idx, inst1->_idx); + } + } else { // (inst1, this) + pair = inst1; + pair->pair_hi_reg = (src_lo_rc == rc_stack) ? dst_lo : src_lo; + if (Verbose) { + tty->print_cr("DEBUG: valid replacement found: %d && %d", pair->_idx, this->_idx); + } + } + pair->_spill_type = Pair; + deleted = 2; + return pair; + } + } + return NULL; +} + //============================================================================= #ifndef PRODUCT @@ -4438,8 +4532,8 @@ __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, Assembler::byte, /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg); - %} - + %} + // The only difference between aarch64_enc_cmpxchg and // aarch64_enc_cmpxchg_acq is that we use load-acquire in the diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp --- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp @@ -64,7 +64,7 @@ // Peephole and CISC spilling both break the graph, and so makes the // scheduler sick. -define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, OptoPeephole, true); define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1298,6 +1298,23 @@ strw(Rx, spill_address(4, offset)); } } + void spill(Register Rx, Register Ry,bool is64, int offset) { + if (is64) { + if (offset <= 504) { + stp(Rx, Ry, spill_address(8, offset)); + } else { + str(Rx, spill_address(8, offset)); + str(Ry, spill_address(8, offset+8)); + } + } else { + if (offset <= 252) { + stpw(Rx, Ry, spill_address(4, offset)); + } else { + strw(Rx, spill_address(4, offset)); + strw(Ry, spill_address(4, offset+4)); + } + } + } void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { str(Vx, T, spill_address(1 << (int)T, offset)); } @@ -1308,6 +1325,23 @@ ldrw(Rx, spill_address(4, offset)); } } + void unspill(Register Rx, Register Ry, bool is64, int offset) { + if (is64) { + if (offset <= 504) { + ldp(Rx, Ry, spill_address(8, offset)); + } else { + ldr(Rx, spill_address(8, offset)); + ldr(Ry, spill_address(8, offset+8)); + } + } else { + if (offset <= 252) { + ldpw(Rx, Ry, spill_address(4, offset)); + } else { + ldrw(Rx, spill_address(4, offset)); + ldrw(Ry, spill_address(4, offset+4)); + } + } + } void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { ldr(Vx, T, spill_address(1 << (int)T, offset)); } diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -2426,13 +2426,14 @@ cfg.fixup_flow(); } +#if 0 // Apply peephole optimizations if( OptoPeephole ) { TracePhase tp("peephole", &timers[_t_peephole]); PhasePeephole peep( _regalloc, cfg); peep.do_transform(); } - +#endif // Do late expand if CPU requires this. if (Matcher::require_postalloc_expand) { TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]); diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp --- a/src/hotspot/share/opto/machnode.hpp +++ b/src/hotspot/share/opto/machnode.hpp @@ -560,13 +560,15 @@ InputToRematerialization, // When rematerializing a node we stretch the inputs live ranges, and they might be // stretched beyond a new definition point, therefore we split out new copies instead CallUse, // Spill use at a call - Bound // An lrg marked as spill that is bound and needs to be spilled at a use + Bound, // An lrg marked as spill that is bound and needs to be spilled at a use + Pair // Spill two in a pair }; private: const RegMask *_in; // RegMask for input const RegMask *_out; // RegMask for output const Type *_type; - const SpillType _spill_type; + SpillType _spill_type; + OptoReg::Name pair_hi_reg; // Ugly prototype public: MachSpillCopyNode(SpillType spill_type, Node *n, const RegMask &in, const RegMask &out ) : MachIdealNode(), _spill_type(spill_type), _in(&in), _out(&out), _type(n->bottom_type()) { @@ -574,6 +576,7 @@ init_flags(Flag_is_Copy); add_req(NULL); add_req(n); + pair_hi_reg = OptoReg::Bad; } virtual uint size_of() const { return sizeof(*this); } void set_out_RegMask(const RegMask &out) { _out = &out; } @@ -587,7 +590,9 @@ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const; virtual uint size(PhaseRegAlloc *ra_) const; - +#ifdef AARCH64 + virtual MachNode *peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted); +#endif #ifndef PRODUCT static const char *spill_type(SpillType st) { @@ -618,6 +623,8 @@ return "CallUseSpillCopy"; case Bound: return "BoundSpillCopy"; + case Pair: + return "Pair"; default: assert(false, "Must have valid spill type"); return "MachSpillCopy"; diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -151,6 +151,13 @@ return; } + // Apply peephole optimizations + if( OptoPeephole ) { + TracePhase tp("peephole", &timers[_t_peephole]); + PhasePeephole peep( _regalloc, *_cfg); + peep.do_transform(); + } + fill_buffer(cb, blk_starts); }