28 extern uint size_java_to_interp();
29 extern uint reloc_java_to_interp();
30 extern uint size_exception_handler();
31 extern uint size_deopt_handler();
32
33 #ifndef PRODUCT
34 #define DEBUG_ARG(x) , x
35 #else
36 #define DEBUG_ARG(x)
37 #endif
38
39 extern int emit_exception_handler(CodeBuffer &cbuf);
40 extern int emit_deopt_handler(CodeBuffer &cbuf);
41
42 //------------------------------Output-----------------------------------------
43 // Convert Nodes to instruction bits and pass off to the VM
44 void Compile::Output() {
45 // RootNode goes
46 assert( _cfg->_broot->_nodes.size() == 0, "" );
47
48 // Initialize the space for the BufferBlob used to find and verify
49 // instruction size in MachNode::emit_size()
50 init_scratch_buffer_blob();
51 if (failing()) return; // Out of memory
52
53 // The number of new nodes (mostly MachNop) is proportional to
54 // the number of java calls and inner loops which are aligned.
55 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
56 C->inner_loops()*(OptoLoopAlignment-1)),
57 "out of nodes before code generation" ) ) {
58 return;
59 }
60 // Make sure I can find the Start Node
61 Block_Array& bbs = _cfg->_bbs;
62 Block *entry = _cfg->_blocks[1];
63 Block *broot = _cfg->_broot;
64
65 const StartNode *start = entry->_nodes[0]->as_Start();
66
67 // Replace StartNode with prolog
68 MachPrologNode *prolog = new (this) MachPrologNode();
69 entry->_nodes.map( 0, prolog );
70 bbs.map( prolog->_idx, entry );
71 bbs.map( start->_idx, NULL ); // start is no longer in any block
72
300 // block(s) does not have enough instructions.
301 Block *nb = b;
302 while( inst_cnt > 0 &&
303 i < last_block &&
304 !_cfg->_blocks[i+1]->has_loop_alignment() &&
305 !nb->has_successor(b) ) {
306 i++;
307 nb = _cfg->_blocks[i];
308 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
309 } // while( inst_cnt > 0 && i < last_block )
310
311 b->set_first_inst_size(sum_size);
312 } // f( b->head()->is_Loop() )
313 } // for( i <= last_block )
314 } // if( MaxLoopPad < OptoLoopAlignment-1 )
315 }
316
317 //----------------------Shorten_branches---------------------------------------
318 // The architecture description provides short branch variants for some long
319 // branch instructions. Replace eligible long branches with short branches.
320 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) {
321
322 // fill in the nop array for bundling computations
323 MachNode *_nop_list[Bundle::_nop_count];
324 Bundle::initialize_nops(_nop_list, this);
325
326 // ------------------
327 // Compute size of each block, method size, and relocation information size
328 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
329 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
330 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
331 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
332 blk_starts[0] = 0;
333
334 // Initialize the sizes to 0
335 code_size = 0; // Size in bytes of generated code
336 stub_size = 0; // Size in bytes of all stub entries
337 // Size in bytes of all relocation entries, including those in local stubs.
338 // Start with 2-bytes of reloc info for the unvalidated entry point
339 reloc_size = 1; // Number of relocation entries
340 const_size = 0; // size of fp constants in words
341
342 // Make three passes. The first computes pessimistic blk_starts,
343 // relative jmp_end, reloc_size and const_size information.
344 // The second performs short branch substitution using the pessimistic
345 // sizing. The third inserts nops where needed.
346
347 Node *nj; // tmp
348
349 // Step one, perform a pessimistic sizing pass.
350 uint i;
351 uint min_offset_from_last_call = 1; // init to a positive value
352 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
353 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
354 Block *b = _cfg->_blocks[i];
355
356 // Sum all instruction sizes to compute block size
357 uint last_inst = b->_nodes.size();
358 uint blk_size = 0;
359 for( uint j = 0; j<last_inst; j++ ) {
360 nj = b->_nodes[j];
361 uint inst_size = nj->size(_regalloc);
362 blk_size += inst_size;
363 // Handle machine instruction nodes
364 if( nj->is_Mach() ) {
365 MachNode *mach = nj->as_Mach();
366 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
367 reloc_size += mach->reloc();
368 const_size += mach->const_size();
369 if( mach->is_MachCall() ) {
370 MachCallNode *mcall = mach->as_MachCall();
371 // This destination address is NOT PC-relative
372
373 mcall->method_set((intptr_t)mcall->entry_point());
374
375 if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
376 stub_size += size_java_to_interp();
377 reloc_size += reloc_java_to_interp();
378 }
379 } else if (mach->is_MachSafePoint()) {
380 // If call/safepoint are adjacent, account for possible
381 // nop to disambiguate the two safepoints.
382 if (min_offset_from_last_call == 0) {
383 blk_size += nop_size;
384 }
385 } else if (mach->ideal_Opcode() == Op_Jump) {
386 const_size += b->_num_succs; // Address table size
387 // The size is valid even for 64 bit since it is
388 // multiplied by 2*jintSize on this method exit.
389 }
390 }
391 min_offset_from_last_call += inst_size;
392 // Remember end of call offset
393 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
394 min_offset_from_last_call = 0;
395 }
396 }
397
398 // During short branch replacement, we store the relative (to blk_starts)
399 // end of jump in jmp_end, rather than the absolute end of jump. This
400 // is so that we do not need to recompute sizes of all nodes when we compute
401 // correct blk_starts in our next sizing pass.
402 jmp_end[i] = blk_size;
403 DEBUG_ONLY( jmp_target[i] = 0; )
404
405 // When the next block starts a loop, we may insert pad NOP
406 // instructions. Since we cannot know our future alignment,
407 // assume the worst.
408 if( i<_cfg->_num_blocks-1 ) {
529 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) {
530 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
531 }
532 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp");
533 }
534 }
535 #endif
536
537 // ------------------
538 // Compute size for code buffer
539 code_size = blk_starts[i-1] + jmp_end[i-1];
540
541 // Relocation records
542 reloc_size += 1; // Relo entry for exception handler
543
544 // Adjust reloc_size to number of record of relocation info
545 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
546 // a relocation index.
547 // The CodeBuffer will expand the locs array if this estimate is too low.
548 reloc_size *= 10 / sizeof(relocInfo);
549
550 // Adjust const_size to number of bytes
551 const_size *= 2*jintSize; // both float and double take two words per entry
552
553 }
554
555 //------------------------------FillLocArray-----------------------------------
556 // Create a bit of debug info and append it to the array. The mapping is from
557 // Java local or expression stack to constant, register or stack-slot. For
558 // doubles, insert 2 mappings and return 1 (to tell the caller that the next
559 // entry has been taken care of and caller should skip it).
560 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
561 // This should never have accepted Bad before
562 assert(OptoReg::is_valid(regnum), "location must be valid");
563 return (OptoReg::is_reg(regnum))
564 ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
565 : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
566 }
567
568
569 ObjectValue*
570 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
571 for (int i = 0; i < objs->length(); i++) {
572 assert(objs->at(i)->is_object(), "corrupt object cache");
1069 // doesn't work if the fp reg to spill contains a single-precision denorm.
1070 // Instead, we hack around the normal spill mechanism using stfspill's and
1071 // ldffill's in the MachProlog and MachEpilog emit methods. We allocate
1072 // space here for the fp arg regs (f8-f15) we're going to thusly spill.
1073 //
1074 // If we ever implement 16-byte 'registers' == stack slots, we can
1075 // get rid of this hack and have SpillCopy generate stfspill/ldffill
1076 // instead of stfd/stfs/ldfd/ldfs.
1077 _frame_slots += 8*(16/BytesPerInt);
1078 }
1079 #endif
1080 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
1081
1082 // Create an array of unused labels, one for each basic block
1083 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1084
1085 for( i=0; i <= _cfg->_num_blocks; i++ ) {
1086 blk_labels[i].init();
1087 }
1088
1089 // If this machine supports different size branch offsets, then pre-compute
1090 // the length of the blocks
1091 if( _matcher->is_short_branch_offset(-1, 0) ) {
1092 Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req);
1093 labels_not_set = false;
1094 }
1095
1096 // nmethod and CodeBuffer count stubs & constants as part of method's code.
1097 int exception_handler_req = size_exception_handler();
1098 int deopt_handler_req = size_deopt_handler();
1099 exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1100 deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1101 stub_req += MAX_stubs_size; // ensure per-stub margin
1102 code_req += MAX_inst_size; // ensure per-instruction margin
1103
1104 if (StressCodeBuffers)
1105 code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
1106
1107 int total_req =
1108 code_req +
1109 pad_req +
1110 stub_req +
1111 exception_handler_req +
1112 deopt_handler_req + // deopt handler
1113 const_req;
1114
1115 if (has_method_handle_invokes())
1116 total_req += deopt_handler_req; // deopt MH handler
1117
1118 CodeBuffer* cb = code_buffer();
1119 cb->initialize(total_req, locs_req);
1120
1121 // Have we run out of code space?
1122 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1123 turn_off_compiler(this);
1124 return;
1125 }
1126 // Configure the code buffer.
1127 cb->initialize_consts_size(const_req);
1128 cb->initialize_stubs_size(stub_req);
1129 cb->initialize_oop_recorder(env()->oop_recorder());
1130
1131 // fill in the nop array for bundling computations
1132 MachNode *_nop_list[Bundle::_nop_count];
1133 Bundle::initialize_nops(_nop_list, this);
1163 #endif
1164
1165 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
1166
1167 // ------------------
1168 // Now fill in the code buffer
1169 Node *delay_slot = NULL;
1170
1171 for( i=0; i < _cfg->_num_blocks; i++ ) {
1172 Block *b = _cfg->_blocks[i];
1173
1174 Node *head = b->head();
1175
1176 // If this block needs to start aligned (i.e, can be reached other
1177 // than by falling-thru from the previous block), then force the
1178 // start of a new bundle.
1179 if( Pipeline::requires_bundling() && starts_bundle(head) )
1180 cb->flush_bundle(true);
1181
1182 // Define the label at the beginning of the basic block
1183 if( labels_not_set )
1184 MacroAssembler(cb).bind( blk_labels[b->_pre_order] );
1185
1186 else
1187 assert( blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1188 "label position does not match code offset" );
1189
1190 uint last_inst = b->_nodes.size();
1191
1192 // Emit block normally, except for last instruction.
1193 // Emit means "dump code bits into code buffer".
1194 for( uint j = 0; j<last_inst; j++ ) {
1195
1196 // Get the node
1197 Node* n = b->_nodes[j];
1198
1199 // See if delay slots are supported
1200 if (valid_bundle_info(n) &&
1201 node_bundling(n)->used_in_unconditional_delay()) {
1202 assert(delay_slot == NULL, "no use of delay slot node");
1203 assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1204
1205 delay_slot = n;
1206 continue;
1207 }
1208
1685 }
1686
1687 //------------------------------ScheduleAndBundle------------------------------
1688 // Perform instruction scheduling and bundling over the sequence of
1689 // instructions in backwards order.
1690 void Compile::ScheduleAndBundle() {
1691
1692 // Don't optimize this if it isn't a method
1693 if (!_method)
1694 return;
1695
1696 // Don't optimize this if scheduling is disabled
1697 if (!do_scheduling())
1698 return;
1699
1700 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1701
1702 // Create a data structure for all the scheduling information
1703 Scheduling scheduling(Thread::current()->resource_area(), *this);
1704
1705 // Walk backwards over each basic block, computing the needed alignment
1706 // Walk over all the basic blocks
1707 scheduling.DoScheduling();
1708 }
1709
1710 //------------------------------ComputeLocalLatenciesForward-------------------
1711 // Compute the latency of all the instructions. This is fairly simple,
1712 // because we already have a legal ordering. Walk over the instructions
1713 // from first to last, and compute the latency of the instruction based
1714 // on the latency of the preceding instruction(s).
1715 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1716 #ifndef PRODUCT
1717 if (_cfg->C->trace_opto_output())
1718 tty->print("# -> ComputeLocalLatenciesForward\n");
1719 #endif
1720
1721 // Walk over all the schedulable instructions
1722 for( uint j=_bb_start; j < _bb_end; j++ ) {
1723
1724 // This is a kludge, forcing all latency calculations to start at 1.
1725 // Used to allow latency 0 to force an instruction to the beginning
1726 // of the bb
1727 uint latency = 1;
|
28 extern uint size_java_to_interp();
29 extern uint reloc_java_to_interp();
30 extern uint size_exception_handler();
31 extern uint size_deopt_handler();
32
33 #ifndef PRODUCT
34 #define DEBUG_ARG(x) , x
35 #else
36 #define DEBUG_ARG(x)
37 #endif
38
39 extern int emit_exception_handler(CodeBuffer &cbuf);
40 extern int emit_deopt_handler(CodeBuffer &cbuf);
41
42 //------------------------------Output-----------------------------------------
43 // Convert Nodes to instruction bits and pass off to the VM
44 void Compile::Output() {
45 // RootNode goes
46 assert( _cfg->_broot->_nodes.size() == 0, "" );
47
48 // The number of new nodes (mostly MachNop) is proportional to
49 // the number of java calls and inner loops which are aligned.
50 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
51 C->inner_loops()*(OptoLoopAlignment-1)),
52 "out of nodes before code generation" ) ) {
53 return;
54 }
55 // Make sure I can find the Start Node
56 Block_Array& bbs = _cfg->_bbs;
57 Block *entry = _cfg->_blocks[1];
58 Block *broot = _cfg->_broot;
59
60 const StartNode *start = entry->_nodes[0]->as_Start();
61
62 // Replace StartNode with prolog
63 MachPrologNode *prolog = new (this) MachPrologNode();
64 entry->_nodes.map( 0, prolog );
65 bbs.map( prolog->_idx, entry );
66 bbs.map( start->_idx, NULL ); // start is no longer in any block
67
295 // block(s) does not have enough instructions.
296 Block *nb = b;
297 while( inst_cnt > 0 &&
298 i < last_block &&
299 !_cfg->_blocks[i+1]->has_loop_alignment() &&
300 !nb->has_successor(b) ) {
301 i++;
302 nb = _cfg->_blocks[i];
303 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
304 } // while( inst_cnt > 0 && i < last_block )
305
306 b->set_first_inst_size(sum_size);
307 } // f( b->head()->is_Loop() )
308 } // for( i <= last_block )
309 } // if( MaxLoopPad < OptoLoopAlignment-1 )
310 }
311
312 //----------------------Shorten_branches---------------------------------------
313 // The architecture description provides short branch variants for some long
314 // branch instructions. Replace eligible long branches with short branches.
315 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) {
316
317 // fill in the nop array for bundling computations
318 MachNode *_nop_list[Bundle::_nop_count];
319 Bundle::initialize_nops(_nop_list, this);
320
321 // ------------------
322 // Compute size of each block, method size, and relocation information size
323 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
324 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
325 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
326 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
327 blk_starts[0] = 0;
328
329 // Initialize the sizes to 0
330 code_size = 0; // Size in bytes of generated code
331 stub_size = 0; // Size in bytes of all stub entries
332 // Size in bytes of all relocation entries, including those in local stubs.
333 // Start with 2-bytes of reloc info for the unvalidated entry point
334 reloc_size = 1; // Number of relocation entries
335
336 // Make three passes. The first computes pessimistic blk_starts,
337 // relative jmp_end and reloc_size information. The second performs
338 // short branch substitution using the pessimistic sizing. The
339 // third inserts nops where needed.
340
341 Node *nj; // tmp
342
343 // Step one, perform a pessimistic sizing pass.
344 uint i;
345 uint min_offset_from_last_call = 1; // init to a positive value
346 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
347 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
348 Block *b = _cfg->_blocks[i];
349
350 // Sum all instruction sizes to compute block size
351 uint last_inst = b->_nodes.size();
352 uint blk_size = 0;
353 for( uint j = 0; j<last_inst; j++ ) {
354 nj = b->_nodes[j];
355 uint inst_size = nj->size(_regalloc);
356 blk_size += inst_size;
357 // Handle machine instruction nodes
358 if( nj->is_Mach() ) {
359 MachNode *mach = nj->as_Mach();
360 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
361 reloc_size += mach->reloc();
362 if( mach->is_MachCall() ) {
363 MachCallNode *mcall = mach->as_MachCall();
364 // This destination address is NOT PC-relative
365
366 mcall->method_set((intptr_t)mcall->entry_point());
367
368 if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
369 stub_size += size_java_to_interp();
370 reloc_size += reloc_java_to_interp();
371 }
372 } else if (mach->is_MachSafePoint()) {
373 // If call/safepoint are adjacent, account for possible
374 // nop to disambiguate the two safepoints.
375 if (min_offset_from_last_call == 0) {
376 blk_size += nop_size;
377 }
378 }
379 }
380 min_offset_from_last_call += inst_size;
381 // Remember end of call offset
382 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
383 min_offset_from_last_call = 0;
384 }
385 }
386
387 // During short branch replacement, we store the relative (to blk_starts)
388 // end of jump in jmp_end, rather than the absolute end of jump. This
389 // is so that we do not need to recompute sizes of all nodes when we compute
390 // correct blk_starts in our next sizing pass.
391 jmp_end[i] = blk_size;
392 DEBUG_ONLY( jmp_target[i] = 0; )
393
394 // When the next block starts a loop, we may insert pad NOP
395 // instructions. Since we cannot know our future alignment,
396 // assume the worst.
397 if( i<_cfg->_num_blocks-1 ) {
518 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) {
519 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
520 }
521 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp");
522 }
523 }
524 #endif
525
526 // ------------------
527 // Compute size for code buffer
528 code_size = blk_starts[i-1] + jmp_end[i-1];
529
530 // Relocation records
531 reloc_size += 1; // Relo entry for exception handler
532
533 // Adjust reloc_size to number of record of relocation info
534 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
535 // a relocation index.
536 // The CodeBuffer will expand the locs array if this estimate is too low.
537 reloc_size *= 10 / sizeof(relocInfo);
538 }
539
540 //------------------------------FillLocArray-----------------------------------
541 // Create a bit of debug info and append it to the array. The mapping is from
542 // Java local or expression stack to constant, register or stack-slot. For
543 // doubles, insert 2 mappings and return 1 (to tell the caller that the next
544 // entry has been taken care of and caller should skip it).
545 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
546 // This should never have accepted Bad before
547 assert(OptoReg::is_valid(regnum), "location must be valid");
548 return (OptoReg::is_reg(regnum))
549 ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
550 : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
551 }
552
553
554 ObjectValue*
555 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
556 for (int i = 0; i < objs->length(); i++) {
557 assert(objs->at(i)->is_object(), "corrupt object cache");
1054 // doesn't work if the fp reg to spill contains a single-precision denorm.
1055 // Instead, we hack around the normal spill mechanism using stfspill's and
1056 // ldffill's in the MachProlog and MachEpilog emit methods. We allocate
1057 // space here for the fp arg regs (f8-f15) we're going to thusly spill.
1058 //
1059 // If we ever implement 16-byte 'registers' == stack slots, we can
1060 // get rid of this hack and have SpillCopy generate stfspill/ldffill
1061 // instead of stfd/stfs/ldfd/ldfs.
1062 _frame_slots += 8*(16/BytesPerInt);
1063 }
1064 #endif
1065 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
1066
1067 // Create an array of unused labels, one for each basic block
1068 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1069
1070 for( i=0; i <= _cfg->_num_blocks; i++ ) {
1071 blk_labels[i].init();
1072 }
1073
1074 if (has_mach_constant_base_node()) {
1075 // Fill the constant table.
1076 // Note: This must happen before Shorten_branches.
1077 for (i = 0; i < _cfg->_num_blocks; i++) {
1078 Block* b = _cfg->_blocks[i];
1079
1080 for (uint j = 0; j < b->_nodes.size(); j++) {
1081 Node* n = b->_nodes[j];
1082
1083 if (n->is_Mach()) {
1084 MachNode *mach = n->as_Mach();
1085
1086 // If the MachNode is a MachConstantNode evaluate the
1087 // constant value section.
1088 if (mach->is_MachConstant()) {
1089 MachConstantNode* machcon = mach->as_MachConstant();
1090 machcon->eval_constant();
1091 }
1092 }
1093 }
1094 }
1095
1096 // Calculate the size of the constant table (including the padding
1097 // to the next section).
1098 const_req = mach_constant_base_node()->calculate_constant_table_size();
1099 }
1100
1101 // Initialize the space for the BufferBlob used to find and verify
1102 // instruction size in MachNode::emit_size()
1103 init_scratch_buffer_blob(const_req);
1104 if (failing()) return; // Out of memory
1105
1106 // If this machine supports different size branch offsets, then pre-compute
1107 // the length of the blocks
1108 if( _matcher->is_short_branch_offset(-1, 0) ) {
1109 Shorten_branches(blk_labels, code_req, locs_req, stub_req);
1110 labels_not_set = false;
1111 }
1112
1113 // nmethod and CodeBuffer count stubs & constants as part of method's code.
1114 int exception_handler_req = size_exception_handler();
1115 int deopt_handler_req = size_deopt_handler();
1116 exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1117 deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1118 stub_req += MAX_stubs_size; // ensure per-stub margin
1119 code_req += MAX_inst_size; // ensure per-instruction margin
1120
1121 if (StressCodeBuffers)
1122 code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
1123
1124 int total_req =
1125 const_req +
1126 code_req +
1127 pad_req +
1128 stub_req +
1129 exception_handler_req +
1130 deopt_handler_req; // deopt handler
1131
1132 if (has_method_handle_invokes())
1133 total_req += deopt_handler_req; // deopt MH handler
1134
1135 CodeBuffer* cb = code_buffer();
1136 cb->initialize(total_req, locs_req);
1137
1138 // Have we run out of code space?
1139 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1140 turn_off_compiler(this);
1141 return;
1142 }
1143 // Configure the code buffer.
1144 cb->initialize_consts_size(const_req);
1145 cb->initialize_stubs_size(stub_req);
1146 cb->initialize_oop_recorder(env()->oop_recorder());
1147
1148 // fill in the nop array for bundling computations
1149 MachNode *_nop_list[Bundle::_nop_count];
1150 Bundle::initialize_nops(_nop_list, this);
1180 #endif
1181
1182 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
1183
1184 // ------------------
1185 // Now fill in the code buffer
1186 Node *delay_slot = NULL;
1187
1188 for( i=0; i < _cfg->_num_blocks; i++ ) {
1189 Block *b = _cfg->_blocks[i];
1190
1191 Node *head = b->head();
1192
1193 // If this block needs to start aligned (i.e, can be reached other
1194 // than by falling-thru from the previous block), then force the
1195 // start of a new bundle.
1196 if( Pipeline::requires_bundling() && starts_bundle(head) )
1197 cb->flush_bundle(true);
1198
1199 // Define the label at the beginning of the basic block
1200 if (labels_not_set) {
1201 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1202 } else {
1203 assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1204 err_msg("label position does not match code offset: %d != %d",
1205 blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
1206 }
1207
1208 uint last_inst = b->_nodes.size();
1209
1210 // Emit block normally, except for last instruction.
1211 // Emit means "dump code bits into code buffer".
1212 for( uint j = 0; j<last_inst; j++ ) {
1213
1214 // Get the node
1215 Node* n = b->_nodes[j];
1216
1217 // See if delay slots are supported
1218 if (valid_bundle_info(n) &&
1219 node_bundling(n)->used_in_unconditional_delay()) {
1220 assert(delay_slot == NULL, "no use of delay slot node");
1221 assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1222
1223 delay_slot = n;
1224 continue;
1225 }
1226
1703 }
1704
1705 //------------------------------ScheduleAndBundle------------------------------
1706 // Perform instruction scheduling and bundling over the sequence of
1707 // instructions in backwards order.
1708 void Compile::ScheduleAndBundle() {
1709
1710 // Don't optimize this if it isn't a method
1711 if (!_method)
1712 return;
1713
1714 // Don't optimize this if scheduling is disabled
1715 if (!do_scheduling())
1716 return;
1717
1718 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1719
1720 // Create a data structure for all the scheduling information
1721 Scheduling scheduling(Thread::current()->resource_area(), *this);
1722
1723 // Initialize the space for the BufferBlob used to find and verify
1724 // instruction size in MachNode::emit_size()
1725 init_scratch_buffer_blob(MAX_const_size);
1726 if (failing()) return; // Out of memory
1727
1728 // Walk backwards over each basic block, computing the needed alignment
1729 // Walk over all the basic blocks
1730 scheduling.DoScheduling();
1731
1732 // Clear the BufferBlob used for scheduling.
1733 clear_scratch_buffer_blob();
1734 }
1735
1736 //------------------------------ComputeLocalLatenciesForward-------------------
1737 // Compute the latency of all the instructions. This is fairly simple,
1738 // because we already have a legal ordering. Walk over the instructions
1739 // from first to last, and compute the latency of the instruction based
1740 // on the latency of the preceding instruction(s).
1741 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1742 #ifndef PRODUCT
1743 if (_cfg->C->trace_opto_output())
1744 tty->print("# -> ComputeLocalLatenciesForward\n");
1745 #endif
1746
1747 // Walk over all the schedulable instructions
1748 for( uint j=_bb_start; j < _bb_end; j++ ) {
1749
1750 // This is a kludge, forcing all latency calculations to start at 1.
1751 // Used to allow latency 0 to force an instruction to the beginning
1752 // of the bb
1753 uint latency = 1;
|