< prev index next >
src/hotspot/share/opto/output.cpp
Print this page
@@ -29,10 +29,12 @@
#include "code/debugInfo.hpp"
#include "code/debugInfoRec.hpp"
#include "compiler/compileBroker.hpp"
#include "compiler/compilerDirectives.hpp"
#include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
#include "memory/allocation.inline.hpp"
#include "opto/ad.hpp"
#include "opto/callnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/locknode.hpp"
@@ -112,39 +114,37 @@
_cfg->map_node_to_block(epilog, block);
}
}
}
+ // Keeper of sizing aspects
+ BufferSizingData buf_sizes = BufferSizingData();
+
+ // Initialize code buffer
+ init_buffer1(buf_sizes._const);
+ if (failing()) return;
+
+ // Pre-compute the length of blocks and replace
+ // long branches with short if machine supports it.
+ // Must be done before ScheduleAndBundle due to SPARC delay slots
uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
blk_starts[0] = 0;
+ shorten_branches(blk_starts, buf_sizes);
- // Initialize code buffer and process short branches.
- CodeBuffer* cb = init_buffer(blk_starts);
-
- if (cb == NULL || failing()) {
+ ScheduleAndBundle();
+ if (failing()) {
return;
}
- ScheduleAndBundle();
+ // Late barrier analysis must be done after schedule and bundle
+ // Otherwise liveness based spilling will fail
+ BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+ bs->late_barrier_analysis();
-#ifndef PRODUCT
- if (trace_opto_output()) {
- tty->print("\n---- After ScheduleAndBundle ----\n");
- for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
- tty->print("\nBB#%03d:\n", i);
- Block* block = _cfg->get_block(i);
- for (uint j = 0; j < block->number_of_nodes(); j++) {
- Node* n = block->get_node(j);
- OptoReg::Name reg = _regalloc->get_reg_first(n);
- tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
- n->dump();
- }
- }
- }
-#endif
-
- if (failing()) {
+ // Complete sizing of codebuffer
+ CodeBuffer* cb = init_buffer2(buf_sizes);
+ if (cb == NULL || failing()) {
return;
}
BuildOopMaps();
@@ -221,11 +221,11 @@
} // if( MaxLoopPad < OptoLoopAlignment-1 )
}
// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
-void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
+void Compile::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) {
// Compute size of each block, method size, and relocation information size
uint nblocks = _cfg->number_of_blocks();
uint* jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
uint* jmp_size = NEW_RESOURCE_ARRAY(uint,nblocks);
@@ -239,15 +239,15 @@
DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,nblocks); )
bool has_short_branch_candidate = false;
// Initialize the sizes to 0
- code_size = 0; // Size in bytes of generated code
- stub_size = 0; // Size in bytes of all stub entries
+ int code_size = 0; // Size in bytes of generated code
+ int stub_size = 0; // Size in bytes of all stub entries
// Size in bytes of all relocation entries, including those in local stubs.
// Start with 2-bytes of reloc info for the unvalidated entry point
- reloc_size = 1; // Number of relocation entries
+ int reloc_size = 1; // Number of relocation entries
// Make three passes. The first computes pessimistic blk_starts,
// relative jmp_offset and reloc_size information. The second performs
// short branch substitution using the pessimistic sizing. The
// third inserts nops where needed.
@@ -477,10 +477,14 @@
// Adjust reloc_size to number of record of relocation info
// Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
// a relocation index.
// The CodeBuffer will expand the locs array if this estimate is too low.
reloc_size *= 10 / sizeof(relocInfo);
+
+ buf_sizes._reloc = reloc_size;
+ buf_sizes._code = code_size;
+ buf_sizes._stub = stub_size;
}
//------------------------------FillLocArray-----------------------------------
// Create a bit of debug info and append it to the array. The mapping is from
// Java local or expression stack to constant, register or stack-slot. For
@@ -488,12 +492,12 @@
// entry has been taken care of and caller should skip it).
static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
// This should never have accepted Bad before
assert(OptoReg::is_valid(regnum), "location must be valid");
return (OptoReg::is_reg(regnum))
- ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
- : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
+ ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
+ : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
}
ObjectValue*
Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
@@ -587,11 +591,11 @@
// jsr/ret return address which must be restored into a the full
// width 64-bit stack slot.
array->append(new_loc_value( _regalloc, regnum, Location::lng ));
}
#else //_LP64
-#ifdef SPARC
+ #ifdef SPARC
if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
// For SPARC we have to swap high and low words for
// long values stored in a single-register (g0-g7).
array->append(new_loc_value( _regalloc, regnum , Location::normal ));
array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
@@ -608,68 +612,68 @@
array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
array->append(new_loc_value( _regalloc, regnum , Location::normal ));
}
#endif //_LP64
else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
- OptoReg::is_reg(regnum) ) {
+ OptoReg::is_reg(regnum) ) {
array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
- ? Location::float_in_dbl : Location::normal ));
+ ? Location::float_in_dbl : Location::normal ));
} else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
- ? Location::int_in_long : Location::normal ));
+ ? Location::int_in_long : Location::normal ));
} else if( t->base() == Type::NarrowOop ) {
array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
} else {
array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
}
return;
}
// No register. It must be constant data.
switch (t->base()) {
- case Type::Half: // Second half of a double
- ShouldNotReachHere(); // Caller should skip 2nd halves
- break;
- case Type::AnyPtr:
- array->append(new ConstantOopWriteValue(NULL));
- break;
- case Type::AryPtr:
- case Type::InstPtr: // fall through
- array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
- break;
- case Type::NarrowOop:
- if (t == TypeNarrowOop::NULL_PTR) {
+ case Type::Half: // Second half of a double
+ ShouldNotReachHere(); // Caller should skip 2nd halves
+ break;
+ case Type::AnyPtr:
array->append(new ConstantOopWriteValue(NULL));
- } else {
- array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
- }
- break;
- case Type::Int:
- array->append(new ConstantIntValue(t->is_int()->get_con()));
- break;
- case Type::RawPtr:
- // A return address (T_ADDRESS).
- assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
+ break;
+ case Type::AryPtr:
+ case Type::InstPtr: // fall through
+ array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
+ break;
+ case Type::NarrowOop:
+ if (t == TypeNarrowOop::NULL_PTR) {
+ array->append(new ConstantOopWriteValue(NULL));
+ } else {
+ array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
+ }
+ break;
+ case Type::Int:
+ array->append(new ConstantIntValue(t->is_int()->get_con()));
+ break;
+ case Type::RawPtr:
+ // A return address (T_ADDRESS).
+ assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
#ifdef _LP64
- // Must be restored to the full-width 64-bit stack slot.
- array->append(new ConstantLongValue(t->is_ptr()->get_con()));
+ // Must be restored to the full-width 64-bit stack slot.
+ array->append(new ConstantLongValue(t->is_ptr()->get_con()));
#else
- array->append(new ConstantIntValue(t->is_ptr()->get_con()));
+ array->append(new ConstantIntValue(t->is_ptr()->get_con()));
#endif
- break;
- case Type::FloatCon: {
- float f = t->is_float_constant()->getf();
- array->append(new ConstantIntValue(jint_cast(f)));
- break;
- }
- case Type::DoubleCon: {
- jdouble d = t->is_double_constant()->getd();
+ break;
+ case Type::FloatCon: {
+ float f = t->is_float_constant()->getf();
+ array->append(new ConstantIntValue(jint_cast(f)));
+ break;
+ }
+ case Type::DoubleCon: {
+ jdouble d = t->is_double_constant()->getd();
#ifdef _LP64
- array->append(new ConstantIntValue((jint)0));
- array->append(new ConstantDoubleValue(d));
+ array->append(new ConstantIntValue((jint)0));
+ array->append(new ConstantDoubleValue(d));
#else
- // Repack the double as two jints.
+ // Repack the double as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
// grow downwards in all implementations.
// (If, on some machine, the interpreter's Java locals or stack
@@ -677,19 +681,19 @@
jlong_accessor acc;
acc.long_value = jlong_cast(d);
array->append(new ConstantIntValue(acc.words[1]));
array->append(new ConstantIntValue(acc.words[0]));
#endif
- break;
- }
- case Type::Long: {
- jlong d = t->is_long()->get_con();
+ break;
+ }
+ case Type::Long: {
+ jlong d = t->is_long()->get_con();
#ifdef _LP64
- array->append(new ConstantIntValue((jint)0));
- array->append(new ConstantLongValue(d));
+ array->append(new ConstantIntValue((jint)0));
+ array->append(new ConstantLongValue(d));
#else
- // Repack the long as two jints.
+ // Repack the long as two jints.
// The convention the interpreter uses is that the second local
// holds the first raw word of the native double representation.
// This is actually reasonable, since locals and stack arrays
// grow downwards in all implementations.
// (If, on some machine, the interpreter's Java locals or stack
@@ -697,18 +701,18 @@
jlong_accessor acc;
acc.long_value = d;
array->append(new ConstantIntValue(acc.words[1]));
array->append(new ConstantIntValue(acc.words[0]));
#endif
- break;
- }
- case Type::Top: // Add an illegal value here
- array->append(new LocationValue(Location()));
- break;
- default:
- ShouldNotReachHere();
- break;
+ break;
+ }
+ case Type::Top: // Add an illegal value here
+ array->append(new LocationValue(Location()));
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
}
}
// Determine if this node starts a bundle
bool Compile::starts_bundle(const Node *n) const {
@@ -869,62 +873,62 @@
// A simplified version of Process_OopMap_Node, to handle non-safepoints.
class NonSafepointEmitter {
- Compile* C;
- JVMState* _pending_jvms;
- int _pending_offset;
-
- void emit_non_safepoint();
-
- public:
- NonSafepointEmitter(Compile* compile) {
- this->C = compile;
- _pending_jvms = NULL;
- _pending_offset = 0;
- }
-
- void observe_instruction(Node* n, int pc_offset) {
- if (!C->debug_info()->recording_non_safepoints()) return;
-
- Node_Notes* nn = C->node_notes_at(n->_idx);
- if (nn == NULL || nn->jvms() == NULL) return;
- if (_pending_jvms != NULL &&
- _pending_jvms->same_calls_as(nn->jvms())) {
- // Repeated JVMS? Stretch it up here.
- _pending_offset = pc_offset;
- } else {
- if (_pending_jvms != NULL &&
- _pending_offset < pc_offset) {
- emit_non_safepoint();
- }
+ Compile* C;
+ JVMState* _pending_jvms;
+ int _pending_offset;
+
+ void emit_non_safepoint();
+
+public:
+ NonSafepointEmitter(Compile* compile) {
+ this->C = compile;
_pending_jvms = NULL;
- if (pc_offset > C->debug_info()->last_pc_offset()) {
- // This is the only way _pending_jvms can become non-NULL:
- _pending_jvms = nn->jvms();
+ _pending_offset = 0;
+ }
+
+ void observe_instruction(Node* n, int pc_offset) {
+ if (!C->debug_info()->recording_non_safepoints()) return;
+
+ Node_Notes* nn = C->node_notes_at(n->_idx);
+ if (nn == NULL || nn->jvms() == NULL) return;
+ if (_pending_jvms != NULL &&
+ _pending_jvms->same_calls_as(nn->jvms())) {
+ // Repeated JVMS? Stretch it up here.
_pending_offset = pc_offset;
+ } else {
+ if (_pending_jvms != NULL &&
+ _pending_offset < pc_offset) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
+ if (pc_offset > C->debug_info()->last_pc_offset()) {
+ // This is the only way _pending_jvms can become non-NULL:
+ _pending_jvms = nn->jvms();
+ _pending_offset = pc_offset;
+ }
}
}
- }
- // Stay out of the way of real safepoints:
- void observe_safepoint(JVMState* jvms, int pc_offset) {
- if (_pending_jvms != NULL &&
- !_pending_jvms->same_calls_as(jvms) &&
- _pending_offset < pc_offset) {
- emit_non_safepoint();
+ // Stay out of the way of real safepoints:
+ void observe_safepoint(JVMState* jvms, int pc_offset) {
+ if (_pending_jvms != NULL &&
+ !_pending_jvms->same_calls_as(jvms) &&
+ _pending_offset < pc_offset) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
}
- _pending_jvms = NULL;
- }
- void flush_at_end() {
- if (_pending_jvms != NULL) {
- emit_non_safepoint();
+ void flush_at_end() {
+ if (_pending_jvms != NULL) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
}
- _pending_jvms = NULL;
- }
};
void NonSafepointEmitter::emit_non_safepoint() {
JVMState* youngest_jvms = _pending_jvms;
int pc_offset = _pending_offset;
@@ -950,19 +954,15 @@
// Mark the end of the scope set.
debug_info->end_non_safepoint(pc_offset);
}
//------------------------------init_buffer------------------------------------
-CodeBuffer* Compile::init_buffer(uint* blk_starts) {
+void Compile::init_buffer1(int& const_req) {
// Set the initially allocated size
- int code_req = initial_code_capacity;
- int locs_req = initial_locs_capacity;
- int stub_req = initial_stub_capacity;
- int const_req = initial_const_capacity;
+ const_req = initial_const_capacity;
- int pad_req = NativeCall::instruction_size;
// The extra spacing after the code is necessary on some platforms.
// Sometimes we need to patch in a jump after the last instruction,
// if the nmethod has been deoptimized. (See 4932387, 4894843.)
// Compute the byte offset where we can store the deopt pc.
@@ -970,11 +970,11 @@
_orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
}
// Compute prolog code size
_method_size = 0;
- _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
+ _frame_slots = OptoReg::reg2stack(_matcher->_old_SP) + _regalloc->_framesize;
#if defined(IA64) && !defined(AIX)
if (save_argument_registers()) {
// 4815101: this is a stub with implicit and unknown precision fp args.
// The usual spill mechanism can only generate stfd's in this case, which
// doesn't work if the fp reg to spill contains a single-precision denorm.
@@ -993,19 +993,19 @@
if (has_mach_constant_base_node()) {
uint add_size = 0;
// Fill the constant table.
// Note: This must happen before shorten_branches.
for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
- Block* b = _cfg->get_block(i);
+ Block *b = _cfg->get_block(i);
for (uint j = 0; j < b->number_of_nodes(); j++) {
- Node* n = b->get_node(j);
+ Node *n = b->get_node(j);
// If the node is a MachConstantNode evaluate the constant
// value section.
if (n->is_MachConstant()) {
- MachConstantNode* machcon = n->as_MachConstant();
+ MachConstantNode *machcon = n->as_MachConstant();
machcon->eval_constant(C);
} else if (n->is_Mach()) {
// On Power there are more nodes that issue constants.
add_size += (n->as_Mach()->ins_num_consts() * 8);
}
@@ -1019,15 +1019,22 @@
}
// Initialize the space for the BufferBlob used to find and verify
// instruction size in MachNode::emit_size()
init_scratch_buffer_blob(const_req);
- if (failing()) return NULL; // Out of memory
+}
- // Pre-compute the length of blocks and replace
- // long branches with short if machine supports it.
- shorten_branches(blk_starts, code_req, locs_req, stub_req);
+CodeBuffer* Compile::init_buffer2(BufferSizingData& buf_sizes) {
+
+ int stub_req = buf_sizes._stub;
+ int code_req = buf_sizes._code;
+ int const_req = buf_sizes._const;
+
+ int pad_req = NativeCall::instruction_size;
+
+ BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+ stub_req += bs->estimate_stub_size();
// nmethod and CodeBuffer count stubs & constants as part of method's code.
// class HandlerImpl is platform-specific and defined in the *.ad files.
int exception_handler_req = HandlerImpl::size_exception_handler() + MAX_stubs_size; // add marginal slop for handler
int deopt_handler_req = HandlerImpl::size_deopt_handler() + MAX_stubs_size; // add marginal slop for handler
@@ -1036,22 +1043,22 @@
if (StressCodeBuffers)
code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
int total_req =
- const_req +
- code_req +
- pad_req +
- stub_req +
- exception_handler_req +
- deopt_handler_req; // deopt handler
+ const_req +
+ code_req +
+ pad_req +
+ stub_req +
+ exception_handler_req +
+ deopt_handler_req; // deopt handler
if (has_method_handle_invokes())
total_req += deopt_handler_req; // deopt MH handler
CodeBuffer* cb = code_buffer();
- cb->initialize(total_req, locs_req);
+ cb->initialize(total_req, buf_sizes._reloc);
// Have we run out of code space?
if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
C->record_failure("CodeCache is full");
return NULL;
@@ -1266,28 +1273,28 @@
non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
current_offset);
Process_OopMap_Node(mach, current_offset);
} // End if safepoint
- // If this is a null check, then add the start of the previous instruction to the list
+ // If this is a null check, then add the start of the previous instruction to the list
else if( mach->is_MachNullCheck() ) {
inct_starts[inct_cnt++] = previous_offset;
}
- // If this is a branch, then fill in the label with the target BB's label
+ // If this is a branch, then fill in the label with the target BB's label
else if (mach->is_MachBranch()) {
// This requires the TRUE branch target be in succs[0]
uint block_num = block->non_connector_successor(0)->_pre_order;
// Try to replace long branch if delay slot is not used,
// it is mostly for back branches since forward branch's
// distance is not updated yet.
bool delay_slot_is_used = valid_bundle_info(n) &&
node_bundling(n)->use_unconditional_delay();
if (!delay_slot_is_used && mach->may_be_short_branch()) {
- assert(delay_slot == NULL, "not expecting delay slot node");
- int br_size = n->size(_regalloc);
+ assert(delay_slot == NULL, "not expecting delay slot node");
+ int br_size = n->size(_regalloc);
int offset = blk_starts[block_num] - current_offset;
if (block_num >= i) {
// Current and following block's offset are not
// finalized yet, adjust distance by the difference
// between calculated and final offsets of current block.
@@ -1341,11 +1348,11 @@
}
}
}
}
#ifdef ASSERT
- // Check that oop-store precedes the card-mark
+ // Check that oop-store precedes the card-mark
else if (mach->ideal_Opcode() == Op_StoreCM) {
uint storeCM_idx = j;
int count = 0;
for (uint prec = mach->req(); prec < mach->len(); prec++) {
Node *oop_store = mach->in(prec); // Precedence edge
@@ -1512,10 +1519,14 @@
}
}
}
#endif
+ BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+ bs->emit_stubs(*cb);
+ if (failing()) return;
+
#ifndef PRODUCT
// Information on the size of the method, without the extraneous code
Scheduling::increment_method_size(cb->insts_size());
#endif
@@ -1686,24 +1697,24 @@
#endif
// Initializer for class Scheduling
Scheduling::Scheduling(Arena *arena, Compile &compile)
- : _arena(arena),
- _cfg(compile.cfg()),
- _regalloc(compile.regalloc()),
- _scheduled(arena),
- _available(arena),
- _reg_node(arena),
- _pinch_free_list(arena),
- _next_node(NULL),
- _bundle_instr_count(0),
- _bundle_cycle_number(0),
- _bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
+ : _arena(arena),
+ _cfg(compile.cfg()),
+ _regalloc(compile.regalloc()),
+ _scheduled(arena),
+ _available(arena),
+ _reg_node(arena),
+ _pinch_free_list(arena),
+ _next_node(NULL),
+ _bundle_instr_count(0),
+ _bundle_cycle_number(0),
+ _bundle_use(0, 0, resource_count, &_bundle_use_elements[0])
#ifndef PRODUCT
- , _branches(0)
- , _unconditional_delays(0)
+ , _branches(0)
+ , _unconditional_delays(0)
#endif
{
// Create a MachNopNode
_nop = new MachNopNode();
@@ -1780,12 +1791,12 @@
// Clear the bundling information
_bundle_instr_count = 0;
_bundle_use.reset();
memcpy(_bundle_use_elements,
- Pipeline_Use::elaborated_elements,
- sizeof(Pipeline_Use::elaborated_elements));
+ Pipeline_Use::elaborated_elements,
+ sizeof(Pipeline_Use::elaborated_elements));
}
// Perform instruction scheduling and bundling over the sequence of
// instructions in backwards order.
void Compile::ScheduleAndBundle() {
@@ -1808,10 +1819,26 @@
Scheduling scheduling(Thread::current()->resource_area(), *this);
// Walk backwards over each basic block, computing the needed alignment
// Walk over all the basic blocks
scheduling.DoScheduling();
+
+#ifndef PRODUCT
+ if (trace_opto_output()) {
+ tty->print("\n---- After ScheduleAndBundle ----\n");
+ for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+ tty->print("\nBB#%03d:\n", i);
+ Block* block = _cfg->get_block(i);
+ for (uint j = 0; j < block->number_of_nodes(); j++) {
+ Node* n = block->get_node(j);
+ OptoReg::Name reg = _regalloc->get_reg_first(n);
+ tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
+ n->dump();
+ }
+ }
+ }
+#endif
}
// Compute the latency of all the instructions. This is fairly simple,
// because we already have a legal ordering. Walk over the instructions
// from first to last, and compute the latency of the instruction based
@@ -1876,11 +1903,11 @@
// If the node cannot be scheduled this cycle, skip it
if (_current_latency[n_idx] > _bundle_cycle_number) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
- n->_idx, _current_latency[n_idx], _bundle_cycle_number);
+ n->_idx, _current_latency[n_idx], _bundle_cycle_number);
#endif
return (false);
}
const Pipeline *node_pipeline = n->pipeline();
@@ -1893,11 +1920,11 @@
if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
- n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
+ n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
#endif
return (false);
}
// Don't allow non-machine nodes to be handled this way
@@ -2101,16 +2128,16 @@
const Pipeline *avail_pipeline = d->pipeline();
// Don't allow safepoints in the branch shadow, that will
// cause a number of difficulties
if ( avail_pipeline->instructionCount() == 1 &&
- !avail_pipeline->hasMultipleBundles() &&
- !avail_pipeline->hasBranchDelay() &&
- Pipeline::instr_has_unit_size() &&
- d->size(_regalloc) == Pipeline::instr_unit_size() &&
- NodeFitsInBundle(d) &&
- !node_bundling(d)->used_in_delay()) {
+ !avail_pipeline->hasMultipleBundles() &&
+ !avail_pipeline->hasBranchDelay() &&
+ Pipeline::instr_has_unit_size() &&
+ d->size(_regalloc) == Pipeline::instr_unit_size() &&
+ NodeFitsInBundle(d) &&
+ !node_bundling(d)->used_in_delay()) {
if (d->is_Mach() && !d->is_MachSafePoint()) {
// A node that fits in the delay slot was found, so we need to
// set the appropriate bits in the bundle pipeline information so
// that it correctly indicates resource usage. Later, when we
@@ -2151,17 +2178,17 @@
// See if the instruction in the delay slot requires a
// step of the bundles
if (!NodeFitsInBundle(n)) {
#ifndef PRODUCT
- if (_cfg->C->trace_opto_output())
- tty->print("# *** STEP(branch won't fit) ***\n");
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(branch won't fit) ***\n");
#endif
- // Update the state information
- _bundle_instr_count = 0;
- _bundle_cycle_number += 1;
- _bundle_use.step(1);
+ // Update the state information
+ _bundle_instr_count = 0;
+ _bundle_cycle_number += 1;
+ _bundle_use.step(1);
}
}
// Get the number of instructions
uint instruction_count = node_pipeline->instructionCount();
@@ -2203,12 +2230,12 @@
else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
tty->print("# *** STEP(%d >= %d instructions) ***\n",
- instruction_count + _bundle_instr_count,
- Pipeline::_max_instrs_per_cycle);
+ instruction_count + _bundle_instr_count,
+ Pipeline::_max_instrs_per_cycle);
#endif
step(1);
}
}
@@ -2410,11 +2437,11 @@
last->as_Mach()->ideal_Opcode() == Op_Con) {
last = bb->get_node(--_bb_end);
}
assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
if( last->is_Catch() ||
- (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
+ (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
// There might be a prior call. Skip it.
while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
} else if( last->is_MachNullCheck() ) {
// Backup so the last null-checked memory instruction is
// outside the schedulable range. Skip over the nullcheck,
@@ -2480,11 +2507,11 @@
}
}
}
#endif
#ifdef ASSERT
- verify_good_schedule(bb,"after block local scheduling");
+ verify_good_schedule(bb,"after block local scheduling");
#endif
}
#ifndef PRODUCT
if (_cfg->C->trace_opto_output())
@@ -2828,35 +2855,35 @@
// wired into the graph because the register is never
// used or def'ed in the block.
//
void Scheduling::garbage_collect_pinch_nodes() {
#ifndef PRODUCT
- if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
+ if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
#endif
- int trace_cnt = 0;
- for (uint k = 0; k < _reg_node.Size(); k++) {
- Node* pinch = _reg_node[k];
- if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
- // no predecence input edges
- (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
- cleanup_pinch(pinch);
- _pinch_free_list.push(pinch);
- _reg_node.map(k, NULL);
+ int trace_cnt = 0;
+ for (uint k = 0; k < _reg_node.Size(); k++) {
+ Node* pinch = _reg_node[k];
+ if ((pinch != NULL) && pinch->Opcode() == Op_Node &&
+ // no predecence input edges
+ (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
+ cleanup_pinch(pinch);
+ _pinch_free_list.push(pinch);
+ _reg_node.map(k, NULL);
#ifndef PRODUCT
- if (_cfg->C->trace_opto_output()) {
- trace_cnt++;
- if (trace_cnt > 40) {
- tty->print("\n");
- trace_cnt = 0;
- }
- tty->print(" %d", pinch->_idx);
+ if (_cfg->C->trace_opto_output()) {
+ trace_cnt++;
+ if (trace_cnt > 40) {
+ tty->print("\n");
+ trace_cnt = 0;
}
-#endif
+ tty->print(" %d", pinch->_idx);
}
+#endif
}
+ }
#ifndef PRODUCT
- if (_cfg->C->trace_opto_output()) tty->print("\n");
+ if (_cfg->C->trace_opto_output()) tty->print("\n");
#endif
}
// Clean up a pinch node for reuse.
void Scheduling::cleanup_pinch( Node *pinch ) {
@@ -2889,23 +2916,23 @@
// Print Scheduling Statistics
void Scheduling::print_statistics() {
// Print the size added by nops for bundling
tty->print("Nops added %d bytes to total of %d bytes",
- _total_nop_size, _total_method_size);
+ _total_nop_size, _total_method_size);
if (_total_method_size > 0)
tty->print(", for %.2f%%",
- ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
+ ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
tty->print("\n");
// Print the number of branch shadows filled
if (Pipeline::_branch_has_delay_slot) {
tty->print("Of %d branches, %d had unconditional delay slots filled",
- _total_branches, _total_unconditional_delays);
+ _total_branches, _total_unconditional_delays);
if (_total_branches > 0)
tty->print(", for %.2f%%",
- ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
+ ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
tty->print("\n");
}
uint total_instructions = 0, total_bundles = 0;
@@ -2915,8 +2942,8 @@
total_bundles += bundle_count;
}
if (total_bundles > 0)
tty->print("Average ILP (excluding nops) is %.2f\n",
- ((double)total_instructions) / ((double)total_bundles));
+ ((double)total_instructions) / ((double)total_bundles));
}
#endif
< prev index next >