51 #define DEBUG_ARG(x)
52 #endif
53
54 extern int emit_exception_handler(CodeBuffer &cbuf);
55 extern int emit_deopt_handler(CodeBuffer &cbuf);
56
57 //------------------------------Output-----------------------------------------
58 // Convert Nodes to instruction bits and pass off to the VM
59 void Compile::Output() {
60 // RootNode goes
61 assert( _cfg->_broot->_nodes.size() == 0, "" );
62
63 // The number of new nodes (mostly MachNop) is proportional to
64 // the number of java calls and inner loops which are aligned.
65 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
66 C->inner_loops()*(OptoLoopAlignment-1)),
67 "out of nodes before code generation" ) ) {
68 return;
69 }
70 // Make sure I can find the Start Node
71 Block_Array& bbs = _cfg->_bbs;
72 Block *entry = _cfg->_blocks[1];
73 Block *broot = _cfg->_broot;
74
75 const StartNode *start = entry->_nodes[0]->as_Start();
76
77 // Replace StartNode with prolog
78 MachPrologNode *prolog = new (this) MachPrologNode();
79 entry->_nodes.map( 0, prolog );
80 bbs.map( prolog->_idx, entry );
81 bbs.map( start->_idx, NULL ); // start is no longer in any block
82
83 // Virtual methods need an unverified entry point
84
85 if( is_osr_compilation() ) {
86 if( PoisonOSREntry ) {
87 // TODO: Should use a ShouldNotReachHereNode...
88 _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
89 }
90 } else {
91 if( _method && !_method->flags().is_static() ) {
92 // Insert unvalidated entry point
93 _cfg->insert( broot, 0, new (this) MachUEPNode() );
94 }
95
96 }
97
98
99 // Break before main entry point
100 if( (_method && _method->break_at_execute())
101 #ifndef PRODUCT
102 ||(OptoBreakpoint && is_method_compilation())
103 ||(OptoBreakpointOSR && is_osr_compilation())
104 ||(OptoBreakpointC2R && !_method)
105 #endif
106 ) {
107 // checking for _method means that OptoBreakpoint does not apply to
108 // runtime stubs or frame converters
109 _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
110 }
111
112 // Insert epilogs before every return
113 for( uint i=0; i<_cfg->_num_blocks; i++ ) {
114 Block *b = _cfg->_blocks[i];
115 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
116 Node *m = b->end();
117 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
118 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
119 b->add_inst( epilog );
120 bbs.map(epilog->_idx, b);
121 //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
122 }
123 }
124 }
125
126 # ifdef ENABLE_ZAP_DEAD_LOCALS
127 if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
128 # endif
129
130 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
131 blk_starts[0] = 0;
132
133 // Initialize code buffer and process short branches.
134 CodeBuffer* cb = init_buffer(blk_starts);
135
136 if (cb == NULL || failing()) return;
137
138 ScheduleAndBundle();
139
140 #ifndef PRODUCT
141 if (trace_opto_output()) {
235 if ( insert ) { // it is MachSafePoint
236 if ( !n->is_MachCall() ) {
237 insert = false;
238 } else if ( n->is_MachCall() ) {
239 MachCallNode* call = n->as_MachCall();
240 if (call->entry_point() == OptoRuntime::new_instance_Java() ||
241 call->entry_point() == OptoRuntime::new_array_Java() ||
242 call->entry_point() == OptoRuntime::multianewarray2_Java() ||
243 call->entry_point() == OptoRuntime::multianewarray3_Java() ||
244 call->entry_point() == OptoRuntime::multianewarray4_Java() ||
245 call->entry_point() == OptoRuntime::multianewarray5_Java() ||
246 call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
247 call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
248 ) {
249 insert = false;
250 }
251 }
252 if (insert) {
253 Node *zap = call_zap_node(n->as_MachSafePoint(), i);
254 b->_nodes.insert( j, zap );
255 _cfg->_bbs.map( zap->_idx, b );
256 ++j;
257 }
258 }
259 }
260 }
261 }
262
263
264 Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
265 const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
266 CallStaticJavaNode* ideal_node =
267 new (this) CallStaticJavaNode( tf,
268 OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
269 "call zap dead locals stub", 0, TypePtr::BOTTOM);
270 // We need to copy the OopMap from the site we're zapping at.
271 // We have to make a copy, because the zap site might not be
272 // a call site, and zap_dead is a call site.
273 OopMap* clone = node_to_check->oop_map()->deep_copy();
274
275 // Add the cloned OopMap to the zap node
1217 }
1218
1219 // ------------------
1220 // Now fill in the code buffer
1221 Node *delay_slot = NULL;
1222
1223 for (uint i=0; i < nblocks; i++) {
1224 Block *b = _cfg->_blocks[i];
1225
1226 Node *head = b->head();
1227
1228 // If this block needs to start aligned (i.e, can be reached other
1229 // than by falling-thru from the previous block), then force the
1230 // start of a new bundle.
1231 if (Pipeline::requires_bundling() && starts_bundle(head))
1232 cb->flush_bundle(true);
1233
1234 #ifdef ASSERT
1235 if (!b->is_connector()) {
1236 stringStream st;
1237 b->dump_head(&_cfg->_bbs, &st);
1238 MacroAssembler(cb).block_comment(st.as_string());
1239 }
1240 jmp_target[i] = 0;
1241 jmp_offset[i] = 0;
1242 jmp_size[i] = 0;
1243 jmp_rule[i] = 0;
1244 #endif
1245 int blk_offset = current_offset;
1246
1247 // Define the label at the beginning of the basic block
1248 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1249
1250 uint last_inst = b->_nodes.size();
1251
1252 // Emit block normally, except for last instruction.
1253 // Emit means "dump code bits into code buffer".
1254 for (uint j = 0; j<last_inst; j++) {
1255
1256 // Get the node
1257 Node* n = b->_nodes[j];
1293
1294 // align the instruction if necessary
1295 int padding = mach->compute_padding(current_offset);
1296 // Make sure safepoint node for polling is distinct from a call's
1297 // return by adding a nop if needed.
1298 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
1299 padding = nop_size;
1300 }
1301 if (padding == 0 && mach->avoid_back_to_back() &&
1302 current_offset == last_avoid_back_to_back_offset) {
1303 // Avoid back to back some instructions.
1304 padding = nop_size;
1305 }
1306
1307 if(padding > 0) {
1308 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
1309 int nops_cnt = padding / nop_size;
1310 MachNode *nop = new (this) MachNopNode(nops_cnt);
1311 b->_nodes.insert(j++, nop);
1312 last_inst++;
1313 _cfg->_bbs.map( nop->_idx, b );
1314 nop->emit(*cb, _regalloc);
1315 cb->flush_bundle(true);
1316 current_offset = cb->insts_size();
1317 }
1318
1319 // Remember the start of the last call in a basic block
1320 if (is_mcall) {
1321 MachCallNode *mcall = mach->as_MachCall();
1322
1323 // This destination address is NOT PC-relative
1324 mcall->method_set((intptr_t)mcall->entry_point());
1325
1326 // Save the return address
1327 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
1328
1329 if (mcall->is_MachCallLeaf()) {
1330 is_mcall = false;
1331 is_sfn = false;
1332 }
1333 }
1378 // between calculated and final offsets of current block.
1379 offset -= (blk_starts[i] - blk_offset);
1380 }
1381 // In the following code a nop could be inserted before
1382 // the branch which will increase the backward distance.
1383 bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
1384 if (needs_padding && offset <= 0)
1385 offset -= nop_size;
1386
1387 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
1388 // We've got a winner. Replace this branch.
1389 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
1390
1391 // Update the jmp_size.
1392 int new_size = replacement->size(_regalloc);
1393 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
1394 // Insert padding between avoid_back_to_back branches.
1395 if (needs_padding && replacement->avoid_back_to_back()) {
1396 MachNode *nop = new (this) MachNopNode();
1397 b->_nodes.insert(j++, nop);
1398 _cfg->_bbs.map(nop->_idx, b);
1399 last_inst++;
1400 nop->emit(*cb, _regalloc);
1401 cb->flush_bundle(true);
1402 current_offset = cb->insts_size();
1403 }
1404 #ifdef ASSERT
1405 jmp_target[i] = block_num;
1406 jmp_offset[i] = current_offset - blk_offset;
1407 jmp_size[i] = new_size;
1408 jmp_rule[i] = mach->rule();
1409 #endif
1410 b->_nodes.map(j, replacement);
1411 mach->subsume_by(replacement, C);
1412 n = replacement;
1413 mach = replacement;
1414 }
1415 }
1416 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
1417 } else if (mach->ideal_Opcode() == Op_Jump) {
1418 for (uint h = 0; h < b->_num_succs; h++) {
1532 Process_OopMap_Node(mach, adjusted_offset);
1533 }
1534
1535 // Insert the delay slot instruction
1536 delay_slot->emit(*cb, _regalloc);
1537
1538 // Don't reuse it
1539 delay_slot = NULL;
1540 }
1541
1542 } // End for all instructions in block
1543
1544 // If the next block is the top of a loop, pad this block out to align
1545 // the loop top a little. Helps prevent pipe stalls at loop back branches.
1546 if (i < nblocks-1) {
1547 Block *nb = _cfg->_blocks[i+1];
1548 int padding = nb->alignment_padding(current_offset);
1549 if( padding > 0 ) {
1550 MachNode *nop = new (this) MachNopNode(padding / nop_size);
1551 b->_nodes.insert( b->_nodes.size(), nop );
1552 _cfg->_bbs.map( nop->_idx, b );
1553 nop->emit(*cb, _regalloc);
1554 current_offset = cb->insts_size();
1555 }
1556 }
1557 // Verify that the distance for generated before forward
1558 // short branches is still valid.
1559 guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
1560
1561 // Save new block start offset
1562 blk_starts[i] = blk_offset;
1563 } // End of for all blocks
1564 blk_starts[nblocks] = current_offset;
1565
1566 non_safepoints.flush_at_end();
1567
1568 // Offset too large?
1569 if (failing()) return;
1570
1571 // Define a pseudo-label at the end of the code
1572 MacroAssembler(cb).bind( blk_labels[nblocks] );
1720 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
1721 continue;
1722 }
1723 } // End of for all blocks fill in exception table entries
1724 }
1725
1726 // Static Variables
1727 #ifndef PRODUCT
1728 uint Scheduling::_total_nop_size = 0;
1729 uint Scheduling::_total_method_size = 0;
1730 uint Scheduling::_total_branches = 0;
1731 uint Scheduling::_total_unconditional_delays = 0;
1732 uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
1733 #endif
1734
1735 // Initializer for class Scheduling
1736
1737 Scheduling::Scheduling(Arena *arena, Compile &compile)
1738 : _arena(arena),
1739 _cfg(compile.cfg()),
1740 _bbs(compile.cfg()->_bbs),
1741 _regalloc(compile.regalloc()),
1742 _reg_node(arena),
1743 _bundle_instr_count(0),
1744 _bundle_cycle_number(0),
1745 _scheduled(arena),
1746 _available(arena),
1747 _next_node(NULL),
1748 _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
1749 _pinch_free_list(arena)
1750 #ifndef PRODUCT
1751 , _branches(0)
1752 , _unconditional_delays(0)
1753 #endif
1754 {
1755 // Create a MachNopNode
1756 _nop = new (&compile) MachNopNode();
1757
1758 // Now that the nops are in the array, save the count
1759 // (but allow entries for the nops)
1760 _node_bundling_limit = compile.unique();
2068 }
2069 }
2070
2071 // Insert the node in the available list
2072 _available.insert(i, n);
2073
2074 #ifndef PRODUCT
2075 if (_cfg->C->trace_opto_output())
2076 dump_available();
2077 #endif
2078 }
2079
2080 //------------------------------DecrementUseCounts-----------------------------
2081 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
2082 for ( uint i=0; i < n->len(); i++ ) {
2083 Node *def = n->in(i);
2084 if (!def) continue;
2085 if( def->is_Proj() ) // If this is a machine projection, then
2086 def = def->in(0); // propagate usage thru to the base instruction
2087
2088 if( _bbs[def->_idx] != bb ) // Ignore if not block-local
2089 continue;
2090
2091 // Compute the latency
2092 uint l = _bundle_cycle_number + n->latency(i);
2093 if (_current_latency[def->_idx] < l)
2094 _current_latency[def->_idx] = l;
2095
2096 // If this does not have uses then schedule it
2097 if ((--_uses[def->_idx]) == 0)
2098 AddNodeToAvailableList(def);
2099 }
2100 }
2101
2102 //------------------------------AddNodeToBundle--------------------------------
2103 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
2104 #ifndef PRODUCT
2105 if (_cfg->C->trace_opto_output()) {
2106 tty->print("# AddNodeToBundle: ");
2107 n->dump();
2108 }
2109 #endif
2341 #endif
2342
2343 // Force the _uses count to never go to zero for unscheduable pieces
2344 // of the block
2345 for( uint k = 0; k < _bb_start; k++ )
2346 _uses[bb->_nodes[k]->_idx] = 1;
2347 for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
2348 _uses[bb->_nodes[l]->_idx] = 1;
2349
2350 // Iterate backwards over the instructions in the block. Don't count the
2351 // branch projections at end or the block header instructions.
2352 for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
2353 Node *n = bb->_nodes[j];
2354 if( n->is_Proj() ) continue; // Projections handled another way
2355
2356 // Account for all uses
2357 for ( uint k = 0; k < n->len(); k++ ) {
2358 Node *inp = n->in(k);
2359 if (!inp) continue;
2360 assert(inp != n, "no cycles allowed" );
2361 if( _bbs[inp->_idx] == bb ) { // Block-local use?
2362 if( inp->is_Proj() ) // Skip through Proj's
2363 inp = inp->in(0);
2364 ++_uses[inp->_idx]; // Count 1 block-local use
2365 }
2366 }
2367
2368 // If this instruction has a 0 use count, then it is available
2369 if (!_uses[n->_idx]) {
2370 _current_latency[n->_idx] = _bundle_cycle_number;
2371 AddNodeToAvailableList(n);
2372 }
2373
2374 #ifndef PRODUCT
2375 if (_cfg->C->trace_opto_output()) {
2376 tty->print("# uses: %3d: ", _uses[n->_idx]);
2377 n->dump();
2378 }
2379 #endif
2380 }
2381
2382 #ifndef PRODUCT
2383 if (_cfg->C->trace_opto_output())
2626 }
2627 #endif
2628
2629 // Conditionally add precedence edges. Avoid putting edges on Projs.
2630 static void add_prec_edge_from_to( Node *from, Node *to ) {
2631 if( from->is_Proj() ) { // Put precedence edge on Proj's input
2632 assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
2633 from = from->in(0);
2634 }
2635 if( from != to && // No cycles (for things like LD L0,[L0+4] )
2636 !edge_from_to( from, to ) ) // Avoid duplicate edge
2637 from->add_prec(to);
2638 }
2639
2640 //------------------------------anti_do_def------------------------------------
2641 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
2642 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
2643 return;
2644
2645 Node *pinch = _reg_node[def_reg]; // Get pinch point
2646 if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
2647 is_def ) { // Check for a true def (not a kill)
2648 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
2649 return;
2650 }
2651
2652 Node *kill = def; // Rename 'def' to more descriptive 'kill'
2653 debug_only( def = (Node*)0xdeadbeef; )
2654
2655 // After some number of kills there _may_ be a later def
2656 Node *later_def = NULL;
2657
2658 // Finding a kill requires a real pinch-point.
2659 // Check for not already having a pinch-point.
2660 // Pinch points are Op_Node's.
2661 if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
2662 later_def = pinch; // Must be def/kill as optimistic pinch-point
2663 if ( _pinch_free_list.size() > 0) {
2664 pinch = _pinch_free_list.pop();
2665 } else {
2666 pinch = new (_cfg->C) Node(1); // Pinch point to-be
2667 }
2668 if (pinch->_idx >= _regalloc->node_regs_max_index()) {
2669 _cfg->C->record_method_not_compilable("too many D-U pinch points");
2670 return;
2671 }
2672 _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init)
2673 _reg_node.map(def_reg,pinch); // Record pinch-point
2674 //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
2675 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
2676 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call
2677 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
2678 later_def = NULL; // and no later def
2679 }
2680 pinch->set_req(0,later_def); // Hook later def so we can find it
2681 } else { // Else have valid pinch point
2682 if( pinch->in(0) ) // If there is a later-def
2683 later_def = pinch->in(0); // Get it
2684 }
2685
2686 // Add output-dependence edge from later def to kill
2687 if( later_def ) // If there is some original def
2688 add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
2689
2690 // See if current kill is also a use, and so is forced to be the pinch-point.
2691 if( pinch->Opcode() == Op_Node ) {
2692 Node *uses = kill->is_Proj() ? kill->in(0) : kill;
2696 // Yes, found a use/kill pinch-point
2697 pinch->set_req(0,NULL); //
2698 pinch->replace_by(kill); // Move anti-dep edges up
2699 pinch = kill;
2700 _reg_node.map(def_reg,pinch);
2701 return;
2702 }
2703 }
2704 }
2705
2706 // Add edge from kill to pinch-point
2707 add_prec_edge_from_to(kill,pinch);
2708 }
2709
2710 //------------------------------anti_do_use------------------------------------
2711 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
2712 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
2713 return;
2714 Node *pinch = _reg_node[use_reg]; // Get pinch point
2715 // Check for no later def_reg/kill in block
2716 if( pinch && _bbs[pinch->_idx] == b &&
2717 // Use has to be block-local as well
2718 _bbs[use->_idx] == b ) {
2719 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
2720 pinch->req() == 1 ) { // pinch not yet in block?
2721 pinch->del_req(0); // yank pointer to later-def, also set flag
2722 // Insert the pinch-point in the block just after the last use
2723 b->_nodes.insert(b->find_node(use)+1,pinch);
2724 _bb_end++; // Increase size scheduled region in block
2725 }
2726
2727 add_prec_edge_from_to(pinch,use);
2728 }
2729 }
2730
2731 //------------------------------ComputeRegisterAntidependences-----------------
2732 // We insert antidependences between the reads and following write of
2733 // allocated registers to prevent illegal code motion. Hopefully, the
2734 // number of added references should be fairly small, especially as we
2735 // are only adding references within the current basic block.
2736 void Scheduling::ComputeRegisterAntidependencies(Block *b) {
2737
2738 #ifdef ASSERT
|
51 #define DEBUG_ARG(x)
52 #endif
53
54 extern int emit_exception_handler(CodeBuffer &cbuf);
55 extern int emit_deopt_handler(CodeBuffer &cbuf);
56
57 //------------------------------Output-----------------------------------------
58 // Convert Nodes to instruction bits and pass off to the VM
59 void Compile::Output() {
60 // RootNode goes
61 assert( _cfg->_broot->_nodes.size() == 0, "" );
62
63 // The number of new nodes (mostly MachNop) is proportional to
64 // the number of java calls and inner loops which are aligned.
65 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
66 C->inner_loops()*(OptoLoopAlignment-1)),
67 "out of nodes before code generation" ) ) {
68 return;
69 }
70 // Make sure I can find the Start Node
71 Block *entry = _cfg->_blocks[1];
72 Block *broot = _cfg->_broot;
73
74 const StartNode *start = entry->_nodes[0]->as_Start();
75
76 // Replace StartNode with prolog
77 MachPrologNode *prolog = new (this) MachPrologNode();
78 entry->_nodes.map( 0, prolog );
79 _cfg->map_node_to_block(prolog, entry);
80 _cfg->unmap_node_from_block(start); // start is no longer in any block
81
82 // Virtual methods need an unverified entry point
83
84 if( is_osr_compilation() ) {
85 if( PoisonOSREntry ) {
86 // TODO: Should use a ShouldNotReachHereNode...
87 _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
88 }
89 } else {
90 if( _method && !_method->flags().is_static() ) {
91 // Insert unvalidated entry point
92 _cfg->insert( broot, 0, new (this) MachUEPNode() );
93 }
94
95 }
96
97
98 // Break before main entry point
99 if( (_method && _method->break_at_execute())
100 #ifndef PRODUCT
101 ||(OptoBreakpoint && is_method_compilation())
102 ||(OptoBreakpointOSR && is_osr_compilation())
103 ||(OptoBreakpointC2R && !_method)
104 #endif
105 ) {
106 // checking for _method means that OptoBreakpoint does not apply to
107 // runtime stubs or frame converters
108 _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
109 }
110
111 // Insert epilogs before every return
112 for( uint i=0; i<_cfg->_num_blocks; i++ ) {
113 Block *b = _cfg->_blocks[i];
114 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
115 Node *m = b->end();
116 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
117 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
118 b->add_inst( epilog );
119 _cfg->map_node_to_block(epilog, b);
120 }
121 }
122 }
123
124 # ifdef ENABLE_ZAP_DEAD_LOCALS
125 if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
126 # endif
127
128 uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
129 blk_starts[0] = 0;
130
131 // Initialize code buffer and process short branches.
132 CodeBuffer* cb = init_buffer(blk_starts);
133
134 if (cb == NULL || failing()) return;
135
136 ScheduleAndBundle();
137
138 #ifndef PRODUCT
139 if (trace_opto_output()) {
233 if ( insert ) { // it is MachSafePoint
234 if ( !n->is_MachCall() ) {
235 insert = false;
236 } else if ( n->is_MachCall() ) {
237 MachCallNode* call = n->as_MachCall();
238 if (call->entry_point() == OptoRuntime::new_instance_Java() ||
239 call->entry_point() == OptoRuntime::new_array_Java() ||
240 call->entry_point() == OptoRuntime::multianewarray2_Java() ||
241 call->entry_point() == OptoRuntime::multianewarray3_Java() ||
242 call->entry_point() == OptoRuntime::multianewarray4_Java() ||
243 call->entry_point() == OptoRuntime::multianewarray5_Java() ||
244 call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
245 call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
246 ) {
247 insert = false;
248 }
249 }
250 if (insert) {
251 Node *zap = call_zap_node(n->as_MachSafePoint(), i);
252 b->_nodes.insert( j, zap );
253 _cfg->map_node_to_block(zap, b);
254 ++j;
255 }
256 }
257 }
258 }
259 }
260
261
262 Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
263 const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
264 CallStaticJavaNode* ideal_node =
265 new (this) CallStaticJavaNode( tf,
266 OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
267 "call zap dead locals stub", 0, TypePtr::BOTTOM);
268 // We need to copy the OopMap from the site we're zapping at.
269 // We have to make a copy, because the zap site might not be
270 // a call site, and zap_dead is a call site.
271 OopMap* clone = node_to_check->oop_map()->deep_copy();
272
273 // Add the cloned OopMap to the zap node
1215 }
1216
1217 // ------------------
1218 // Now fill in the code buffer
1219 Node *delay_slot = NULL;
1220
1221 for (uint i=0; i < nblocks; i++) {
1222 Block *b = _cfg->_blocks[i];
1223
1224 Node *head = b->head();
1225
1226 // If this block needs to start aligned (i.e, can be reached other
1227 // than by falling-thru from the previous block), then force the
1228 // start of a new bundle.
1229 if (Pipeline::requires_bundling() && starts_bundle(head))
1230 cb->flush_bundle(true);
1231
1232 #ifdef ASSERT
1233 if (!b->is_connector()) {
1234 stringStream st;
1235 b->dump_head(_cfg, &st);
1236 MacroAssembler(cb).block_comment(st.as_string());
1237 }
1238 jmp_target[i] = 0;
1239 jmp_offset[i] = 0;
1240 jmp_size[i] = 0;
1241 jmp_rule[i] = 0;
1242 #endif
1243 int blk_offset = current_offset;
1244
1245 // Define the label at the beginning of the basic block
1246 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1247
1248 uint last_inst = b->_nodes.size();
1249
1250 // Emit block normally, except for last instruction.
1251 // Emit means "dump code bits into code buffer".
1252 for (uint j = 0; j<last_inst; j++) {
1253
1254 // Get the node
1255 Node* n = b->_nodes[j];
1291
1292 // align the instruction if necessary
1293 int padding = mach->compute_padding(current_offset);
1294 // Make sure safepoint node for polling is distinct from a call's
1295 // return by adding a nop if needed.
1296 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
1297 padding = nop_size;
1298 }
1299 if (padding == 0 && mach->avoid_back_to_back() &&
1300 current_offset == last_avoid_back_to_back_offset) {
1301 // Avoid back to back some instructions.
1302 padding = nop_size;
1303 }
1304
1305 if(padding > 0) {
1306 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
1307 int nops_cnt = padding / nop_size;
1308 MachNode *nop = new (this) MachNopNode(nops_cnt);
1309 b->_nodes.insert(j++, nop);
1310 last_inst++;
1311 _cfg->map_node_to_block(nop, b);
1312 nop->emit(*cb, _regalloc);
1313 cb->flush_bundle(true);
1314 current_offset = cb->insts_size();
1315 }
1316
1317 // Remember the start of the last call in a basic block
1318 if (is_mcall) {
1319 MachCallNode *mcall = mach->as_MachCall();
1320
1321 // This destination address is NOT PC-relative
1322 mcall->method_set((intptr_t)mcall->entry_point());
1323
1324 // Save the return address
1325 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
1326
1327 if (mcall->is_MachCallLeaf()) {
1328 is_mcall = false;
1329 is_sfn = false;
1330 }
1331 }
1376 // between calculated and final offsets of current block.
1377 offset -= (blk_starts[i] - blk_offset);
1378 }
1379 // In the following code a nop could be inserted before
1380 // the branch which will increase the backward distance.
1381 bool needs_padding = (current_offset == last_avoid_back_to_back_offset);
1382 if (needs_padding && offset <= 0)
1383 offset -= nop_size;
1384
1385 if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
1386 // We've got a winner. Replace this branch.
1387 MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
1388
1389 // Update the jmp_size.
1390 int new_size = replacement->size(_regalloc);
1391 assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
1392 // Insert padding between avoid_back_to_back branches.
1393 if (needs_padding && replacement->avoid_back_to_back()) {
1394 MachNode *nop = new (this) MachNopNode();
1395 b->_nodes.insert(j++, nop);
1396 _cfg->map_node_to_block(nop, b);
1397 last_inst++;
1398 nop->emit(*cb, _regalloc);
1399 cb->flush_bundle(true);
1400 current_offset = cb->insts_size();
1401 }
1402 #ifdef ASSERT
1403 jmp_target[i] = block_num;
1404 jmp_offset[i] = current_offset - blk_offset;
1405 jmp_size[i] = new_size;
1406 jmp_rule[i] = mach->rule();
1407 #endif
1408 b->_nodes.map(j, replacement);
1409 mach->subsume_by(replacement, C);
1410 n = replacement;
1411 mach = replacement;
1412 }
1413 }
1414 mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
1415 } else if (mach->ideal_Opcode() == Op_Jump) {
1416 for (uint h = 0; h < b->_num_succs; h++) {
1530 Process_OopMap_Node(mach, adjusted_offset);
1531 }
1532
1533 // Insert the delay slot instruction
1534 delay_slot->emit(*cb, _regalloc);
1535
1536 // Don't reuse it
1537 delay_slot = NULL;
1538 }
1539
1540 } // End for all instructions in block
1541
1542 // If the next block is the top of a loop, pad this block out to align
1543 // the loop top a little. Helps prevent pipe stalls at loop back branches.
1544 if (i < nblocks-1) {
1545 Block *nb = _cfg->_blocks[i+1];
1546 int padding = nb->alignment_padding(current_offset);
1547 if( padding > 0 ) {
1548 MachNode *nop = new (this) MachNopNode(padding / nop_size);
1549 b->_nodes.insert( b->_nodes.size(), nop );
1550 _cfg->map_node_to_block(nop, b);
1551 nop->emit(*cb, _regalloc);
1552 current_offset = cb->insts_size();
1553 }
1554 }
1555 // Verify that the distance for generated before forward
1556 // short branches is still valid.
1557 guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
1558
1559 // Save new block start offset
1560 blk_starts[i] = blk_offset;
1561 } // End of for all blocks
1562 blk_starts[nblocks] = current_offset;
1563
1564 non_safepoints.flush_at_end();
1565
1566 // Offset too large?
1567 if (failing()) return;
1568
1569 // Define a pseudo-label at the end of the code
1570 MacroAssembler(cb).bind( blk_labels[nblocks] );
1718 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
1719 continue;
1720 }
1721 } // End of for all blocks fill in exception table entries
1722 }
1723
1724 // Static Variables
1725 #ifndef PRODUCT
1726 uint Scheduling::_total_nop_size = 0;
1727 uint Scheduling::_total_method_size = 0;
1728 uint Scheduling::_total_branches = 0;
1729 uint Scheduling::_total_unconditional_delays = 0;
1730 uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
1731 #endif
1732
1733 // Initializer for class Scheduling
1734
1735 Scheduling::Scheduling(Arena *arena, Compile &compile)
1736 : _arena(arena),
1737 _cfg(compile.cfg()),
1738 _regalloc(compile.regalloc()),
1739 _reg_node(arena),
1740 _bundle_instr_count(0),
1741 _bundle_cycle_number(0),
1742 _scheduled(arena),
1743 _available(arena),
1744 _next_node(NULL),
1745 _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
1746 _pinch_free_list(arena)
1747 #ifndef PRODUCT
1748 , _branches(0)
1749 , _unconditional_delays(0)
1750 #endif
1751 {
1752 // Create a MachNopNode
1753 _nop = new (&compile) MachNopNode();
1754
1755 // Now that the nops are in the array, save the count
1756 // (but allow entries for the nops)
1757 _node_bundling_limit = compile.unique();
2065 }
2066 }
2067
2068 // Insert the node in the available list
2069 _available.insert(i, n);
2070
2071 #ifndef PRODUCT
2072 if (_cfg->C->trace_opto_output())
2073 dump_available();
2074 #endif
2075 }
2076
2077 //------------------------------DecrementUseCounts-----------------------------
2078 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
2079 for ( uint i=0; i < n->len(); i++ ) {
2080 Node *def = n->in(i);
2081 if (!def) continue;
2082 if( def->is_Proj() ) // If this is a machine projection, then
2083 def = def->in(0); // propagate usage thru to the base instruction
2084
2085 if(_cfg->get_block_for_node(def) != bb) { // Ignore if not block-local
2086 continue;
2087 }
2088
2089 // Compute the latency
2090 uint l = _bundle_cycle_number + n->latency(i);
2091 if (_current_latency[def->_idx] < l)
2092 _current_latency[def->_idx] = l;
2093
2094 // If this does not have uses then schedule it
2095 if ((--_uses[def->_idx]) == 0)
2096 AddNodeToAvailableList(def);
2097 }
2098 }
2099
2100 //------------------------------AddNodeToBundle--------------------------------
2101 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
2102 #ifndef PRODUCT
2103 if (_cfg->C->trace_opto_output()) {
2104 tty->print("# AddNodeToBundle: ");
2105 n->dump();
2106 }
2107 #endif
2339 #endif
2340
2341 // Force the _uses count to never go to zero for unscheduable pieces
2342 // of the block
2343 for( uint k = 0; k < _bb_start; k++ )
2344 _uses[bb->_nodes[k]->_idx] = 1;
2345 for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
2346 _uses[bb->_nodes[l]->_idx] = 1;
2347
2348 // Iterate backwards over the instructions in the block. Don't count the
2349 // branch projections at end or the block header instructions.
2350 for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
2351 Node *n = bb->_nodes[j];
2352 if( n->is_Proj() ) continue; // Projections handled another way
2353
2354 // Account for all uses
2355 for ( uint k = 0; k < n->len(); k++ ) {
2356 Node *inp = n->in(k);
2357 if (!inp) continue;
2358 assert(inp != n, "no cycles allowed" );
2359 if (_cfg->get_block_for_node(inp) == bb) { // Block-local use?
2360 if (inp->is_Proj()) { // Skip through Proj's
2361 inp = inp->in(0);
2362 }
2363 ++_uses[inp->_idx]; // Count 1 block-local use
2364 }
2365 }
2366
2367 // If this instruction has a 0 use count, then it is available
2368 if (!_uses[n->_idx]) {
2369 _current_latency[n->_idx] = _bundle_cycle_number;
2370 AddNodeToAvailableList(n);
2371 }
2372
2373 #ifndef PRODUCT
2374 if (_cfg->C->trace_opto_output()) {
2375 tty->print("# uses: %3d: ", _uses[n->_idx]);
2376 n->dump();
2377 }
2378 #endif
2379 }
2380
2381 #ifndef PRODUCT
2382 if (_cfg->C->trace_opto_output())
2625 }
2626 #endif
2627
2628 // Conditionally add precedence edges. Avoid putting edges on Projs.
2629 static void add_prec_edge_from_to( Node *from, Node *to ) {
2630 if( from->is_Proj() ) { // Put precedence edge on Proj's input
2631 assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
2632 from = from->in(0);
2633 }
2634 if( from != to && // No cycles (for things like LD L0,[L0+4] )
2635 !edge_from_to( from, to ) ) // Avoid duplicate edge
2636 from->add_prec(to);
2637 }
2638
2639 //------------------------------anti_do_def------------------------------------
2640 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
2641 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
2642 return;
2643
2644 Node *pinch = _reg_node[def_reg]; // Get pinch point
2645 if( !pinch || _cfg->get_block_for_node(pinch) != b || // No pinch-point yet?
2646 is_def ) { // Check for a true def (not a kill)
2647 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
2648 return;
2649 }
2650
2651 Node *kill = def; // Rename 'def' to more descriptive 'kill'
2652 debug_only( def = (Node*)0xdeadbeef; )
2653
2654 // After some number of kills there _may_ be a later def
2655 Node *later_def = NULL;
2656
2657 // Finding a kill requires a real pinch-point.
2658 // Check for not already having a pinch-point.
2659 // Pinch points are Op_Node's.
2660 if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
2661 later_def = pinch; // Must be def/kill as optimistic pinch-point
2662 if ( _pinch_free_list.size() > 0) {
2663 pinch = _pinch_free_list.pop();
2664 } else {
2665 pinch = new (_cfg->C) Node(1); // Pinch point to-be
2666 }
2667 if (pinch->_idx >= _regalloc->node_regs_max_index()) {
2668 _cfg->C->record_method_not_compilable("too many D-U pinch points");
2669 return;
2670 }
2671 _cfg->map_node_to_block(pinch, b); // Pretend it's valid in this block (lazy init)
2672 _reg_node.map(def_reg,pinch); // Record pinch-point
2673 //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
2674 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
2675 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call
2676 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
2677 later_def = NULL; // and no later def
2678 }
2679 pinch->set_req(0,later_def); // Hook later def so we can find it
2680 } else { // Else have valid pinch point
2681 if( pinch->in(0) ) // If there is a later-def
2682 later_def = pinch->in(0); // Get it
2683 }
2684
2685 // Add output-dependence edge from later def to kill
2686 if( later_def ) // If there is some original def
2687 add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
2688
2689 // See if current kill is also a use, and so is forced to be the pinch-point.
2690 if( pinch->Opcode() == Op_Node ) {
2691 Node *uses = kill->is_Proj() ? kill->in(0) : kill;
2695 // Yes, found a use/kill pinch-point
2696 pinch->set_req(0,NULL); //
2697 pinch->replace_by(kill); // Move anti-dep edges up
2698 pinch = kill;
2699 _reg_node.map(def_reg,pinch);
2700 return;
2701 }
2702 }
2703 }
2704
2705 // Add edge from kill to pinch-point
2706 add_prec_edge_from_to(kill,pinch);
2707 }
2708
2709 //------------------------------anti_do_use------------------------------------
2710 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
2711 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
2712 return;
2713 Node *pinch = _reg_node[use_reg]; // Get pinch point
2714 // Check for no later def_reg/kill in block
2715 if( pinch && _cfg->get_block_for_node(pinch) == b &&
2716 // Use has to be block-local as well
2717 _cfg->get_block_for_node(use) == b) {
2718 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
2719 pinch->req() == 1 ) { // pinch not yet in block?
2720 pinch->del_req(0); // yank pointer to later-def, also set flag
2721 // Insert the pinch-point in the block just after the last use
2722 b->_nodes.insert(b->find_node(use)+1,pinch);
2723 _bb_end++; // Increase size scheduled region in block
2724 }
2725
2726 add_prec_edge_from_to(pinch,use);
2727 }
2728 }
2729
2730 //------------------------------ComputeRegisterAntidependences-----------------
2731 // We insert antidependences between the reads and following write of
2732 // allocated registers to prevent illegal code motion. Hopefully, the
2733 // number of added references should be fairly small, especially as we
2734 // are only adding references within the current basic block.
2735 void Scheduling::ComputeRegisterAntidependencies(Block *b) {
2736
2737 #ifdef ASSERT
|