src/share/vm/opto/output.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 6961690 Sdiff src/share/vm/opto

src/share/vm/opto/output.cpp

Print this page
rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:


  28 extern uint size_java_to_interp();
  29 extern uint reloc_java_to_interp();
  30 extern uint size_exception_handler();
  31 extern uint size_deopt_handler();
  32 
  33 #ifndef PRODUCT
  34 #define DEBUG_ARG(x) , x
  35 #else
  36 #define DEBUG_ARG(x)
  37 #endif
  38 
  39 extern int emit_exception_handler(CodeBuffer &cbuf);
  40 extern int emit_deopt_handler(CodeBuffer &cbuf);
  41 
  42 //------------------------------Output-----------------------------------------
  43 // Convert Nodes to instruction bits and pass off to the VM
  44 void Compile::Output() {
  45   // RootNode goes
  46   assert( _cfg->_broot->_nodes.size() == 0, "" );
  47 
  48   // Initialize the space for the BufferBlob used to find and verify
  49   // instruction size in MachNode::emit_size()
  50   init_scratch_buffer_blob();
  51   if (failing())  return; // Out of memory
  52 
  53   // The number of new nodes (mostly MachNop) is proportional to
  54   // the number of java calls and inner loops which are aligned.
  55   if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
  56                             C->inner_loops()*(OptoLoopAlignment-1)),
  57                            "out of nodes before code generation" ) ) {
  58     return;
  59   }
  60   // Make sure I can find the Start Node
  61   Block_Array& bbs = _cfg->_bbs;
  62   Block *entry = _cfg->_blocks[1];
  63   Block *broot = _cfg->_broot;
  64 
  65   const StartNode *start = entry->_nodes[0]->as_Start();
  66 
  67   // Replace StartNode with prolog
  68   MachPrologNode *prolog = new (this) MachPrologNode();
  69   entry->_nodes.map( 0, prolog );
  70   bbs.map( prolog->_idx, entry );
  71   bbs.map( start->_idx, NULL ); // start is no longer in any block
  72 


 300         // block(s) does not have enough instructions.
 301         Block *nb = b;
 302         while( inst_cnt > 0 &&
 303                i < last_block &&
 304                !_cfg->_blocks[i+1]->has_loop_alignment() &&
 305                !nb->has_successor(b) ) {
 306           i++;
 307           nb = _cfg->_blocks[i];
 308           inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 309         } // while( inst_cnt > 0 && i < last_block  )
 310 
 311         b->set_first_inst_size(sum_size);
 312       } // f( b->head()->is_Loop() )
 313     } // for( i <= last_block )
 314   } // if( MaxLoopPad < OptoLoopAlignment-1 )
 315 }
 316 
 317 //----------------------Shorten_branches---------------------------------------
 318 // The architecture description provides short branch variants for some long
 319 // branch instructions. Replace eligible long branches with short branches.
 320 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) {
 321 
 322   // fill in the nop array for bundling computations
 323   MachNode *_nop_list[Bundle::_nop_count];
 324   Bundle::initialize_nops(_nop_list, this);
 325 
 326   // ------------------
 327   // Compute size of each block, method size, and relocation information size
 328   uint *jmp_end    = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
 329   uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
 330   DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 331   DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 332   blk_starts[0]    = 0;
 333 
 334   // Initialize the sizes to 0
 335   code_size  = 0;          // Size in bytes of generated code
 336   stub_size  = 0;          // Size in bytes of all stub entries
 337   // Size in bytes of all relocation entries, including those in local stubs.
 338   // Start with 2-bytes of reloc info for the unvalidated entry point
 339   reloc_size = 1;          // Number of relocation entries
 340   const_size = 0;          // size of fp constants in words
 341 
 342   // Make three passes.  The first computes pessimistic blk_starts,
 343   // relative jmp_end, reloc_size and const_size information.
 344   // The second performs short branch substitution using the pessimistic
 345   // sizing. The third inserts nops where needed.
 346 
 347   Node *nj; // tmp
 348 
 349   // Step one, perform a pessimistic sizing pass.
 350   uint i;
 351   uint min_offset_from_last_call = 1;  // init to a positive value
 352   uint nop_size = (new (this) MachNopNode())->size(_regalloc);
 353   for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
 354     Block *b = _cfg->_blocks[i];
 355 
 356     // Sum all instruction sizes to compute block size
 357     uint last_inst = b->_nodes.size();
 358     uint blk_size = 0;
 359     for( uint j = 0; j<last_inst; j++ ) {
 360       nj = b->_nodes[j];
 361       uint inst_size = nj->size(_regalloc);
 362       blk_size += inst_size;
 363       // Handle machine instruction nodes
 364       if( nj->is_Mach() ) {
 365         MachNode *mach = nj->as_Mach();
 366         blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
 367         reloc_size += mach->reloc();
 368         const_size += mach->const_size();
 369         if( mach->is_MachCall() ) {
 370           MachCallNode *mcall = mach->as_MachCall();
 371           // This destination address is NOT PC-relative
 372 
 373           mcall->method_set((intptr_t)mcall->entry_point());
 374 
 375           if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
 376             stub_size  += size_java_to_interp();
 377             reloc_size += reloc_java_to_interp();
 378           }
 379         } else if (mach->is_MachSafePoint()) {
 380           // If call/safepoint are adjacent, account for possible
 381           // nop to disambiguate the two safepoints.
 382           if (min_offset_from_last_call == 0) {
 383             blk_size += nop_size;
 384           }
 385         } else if (mach->ideal_Opcode() == Op_Jump) {
 386           const_size += b->_num_succs; // Address table size
 387           // The size is valid even for 64 bit since it is
 388           // multiplied by 2*jintSize on this method exit.
 389         }
 390       }
 391       min_offset_from_last_call += inst_size;
 392       // Remember end of call offset
 393       if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
 394         min_offset_from_last_call = 0;
 395       }
 396     }
 397 
 398     // During short branch replacement, we store the relative (to blk_starts)
 399     // end of jump in jmp_end, rather than the absolute end of jump.  This
 400     // is so that we do not need to recompute sizes of all nodes when we compute
 401     // correct blk_starts in our next sizing pass.
 402     jmp_end[i] = blk_size;
 403     DEBUG_ONLY( jmp_target[i] = 0; )
 404 
 405     // When the next block starts a loop, we may insert pad NOP
 406     // instructions.  Since we cannot know our future alignment,
 407     // assume the worst.
 408     if( i<_cfg->_num_blocks-1 ) {


 529       if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) {
 530         tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
 531       }
 532       assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp");
 533     }
 534   }
 535 #endif
 536 
 537   // ------------------
 538   // Compute size for code buffer
 539   code_size   = blk_starts[i-1] + jmp_end[i-1];
 540 
 541   // Relocation records
 542   reloc_size += 1;              // Relo entry for exception handler
 543 
 544   // Adjust reloc_size to number of record of relocation info
 545   // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
 546   // a relocation index.
 547   // The CodeBuffer will expand the locs array if this estimate is too low.
 548   reloc_size   *= 10 / sizeof(relocInfo);
 549 
 550   // Adjust const_size to number of bytes
 551   const_size   *= 2*jintSize; // both float and double take two words per entry
 552 
 553 }
 554 
 555 //------------------------------FillLocArray-----------------------------------
 556 // Create a bit of debug info and append it to the array.  The mapping is from
 557 // Java local or expression stack to constant, register or stack-slot.  For
 558 // doubles, insert 2 mappings and return 1 (to tell the caller that the next
 559 // entry has been taken care of and caller should skip it).
 560 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
 561   // This should never have accepted Bad before
 562   assert(OptoReg::is_valid(regnum), "location must be valid");
 563   return (OptoReg::is_reg(regnum))
 564     ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
 565     : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
 566 }
 567 
 568 
 569 ObjectValue*
 570 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
 571   for (int i = 0; i < objs->length(); i++) {
 572     assert(objs->at(i)->is_object(), "corrupt object cache");


1069     // doesn't work if the fp reg to spill contains a single-precision denorm.
1070     // Instead, we hack around the normal spill mechanism using stfspill's and
1071     // ldffill's in the MachProlog and MachEpilog emit methods.  We allocate
1072     // space here for the fp arg regs (f8-f15) we're going to thusly spill.
1073     //
1074     // If we ever implement 16-byte 'registers' == stack slots, we can
1075     // get rid of this hack and have SpillCopy generate stfspill/ldffill
1076     // instead of stfd/stfs/ldfd/ldfs.
1077     _frame_slots += 8*(16/BytesPerInt);
1078   }
1079 #endif
1080   assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
1081 
1082   // Create an array of unused labels, one for each basic block
1083   Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1084 
1085   for( i=0; i <= _cfg->_num_blocks; i++ ) {
1086     blk_labels[i].init();
1087   }
1088 
































1089   // If this machine supports different size branch offsets, then pre-compute
1090   // the length of the blocks
1091   if( _matcher->is_short_branch_offset(-1, 0) ) {
1092     Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req);
1093     labels_not_set = false;
1094   }
1095 
1096   // nmethod and CodeBuffer count stubs & constants as part of method's code.
1097   int exception_handler_req = size_exception_handler();
1098   int deopt_handler_req = size_deopt_handler();
1099   exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1100   deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1101   stub_req += MAX_stubs_size;   // ensure per-stub margin
1102   code_req += MAX_inst_size;    // ensure per-instruction margin
1103 
1104   if (StressCodeBuffers)
1105     code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
1106 
1107   int total_req =

1108     code_req +
1109     pad_req +
1110     stub_req +
1111     exception_handler_req +
1112     deopt_handler_req +              // deopt handler
1113     const_req;
1114 
1115   if (has_method_handle_invokes())
1116     total_req += deopt_handler_req;  // deopt MH handler
1117 
1118   CodeBuffer* cb = code_buffer();
1119   cb->initialize(total_req, locs_req);
1120 
1121   // Have we run out of code space?
1122   if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1123     turn_off_compiler(this);
1124     return;
1125   }
1126   // Configure the code buffer.
1127   cb->initialize_consts_size(const_req);
1128   cb->initialize_stubs_size(stub_req);
1129   cb->initialize_oop_recorder(env()->oop_recorder());
1130 
1131   // fill in the nop array for bundling computations
1132   MachNode *_nop_list[Bundle::_nop_count];
1133   Bundle::initialize_nops(_nop_list, this);


1163 #endif
1164 
1165   NonSafepointEmitter non_safepoints(this);  // emit non-safepoints lazily
1166 
1167   // ------------------
1168   // Now fill in the code buffer
1169   Node *delay_slot = NULL;
1170 
1171   for( i=0; i < _cfg->_num_blocks; i++ ) {
1172     Block *b = _cfg->_blocks[i];
1173 
1174     Node *head = b->head();
1175 
1176     // If this block needs to start aligned (i.e, can be reached other
1177     // than by falling-thru from the previous block), then force the
1178     // start of a new bundle.
1179     if( Pipeline::requires_bundling() && starts_bundle(head) )
1180       cb->flush_bundle(true);
1181 
1182     // Define the label at the beginning of the basic block
1183     if( labels_not_set )
1184       MacroAssembler(cb).bind( blk_labels[b->_pre_order] );
1185 
1186     else
1187       assert( blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1188               "label position does not match code offset" );

1189 
1190     uint last_inst = b->_nodes.size();
1191 
1192     // Emit block normally, except for last instruction.
1193     // Emit means "dump code bits into code buffer".
1194     for( uint j = 0; j<last_inst; j++ ) {
1195 
1196       // Get the node
1197       Node* n = b->_nodes[j];
1198 
1199       // See if delay slots are supported
1200       if (valid_bundle_info(n) &&
1201           node_bundling(n)->used_in_unconditional_delay()) {
1202         assert(delay_slot == NULL, "no use of delay slot node");
1203         assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1204 
1205         delay_slot = n;
1206         continue;
1207       }
1208 


1685 }
1686 
1687 //------------------------------ScheduleAndBundle------------------------------
1688 // Perform instruction scheduling and bundling over the sequence of
1689 // instructions in backwards order.
1690 void Compile::ScheduleAndBundle() {
1691 
1692   // Don't optimize this if it isn't a method
1693   if (!_method)
1694     return;
1695 
1696   // Don't optimize this if scheduling is disabled
1697   if (!do_scheduling())
1698     return;
1699 
1700   NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1701 
1702   // Create a data structure for all the scheduling information
1703   Scheduling scheduling(Thread::current()->resource_area(), *this);
1704 





1705   // Walk backwards over each basic block, computing the needed alignment
1706   // Walk over all the basic blocks
1707   scheduling.DoScheduling();



1708 }
1709 
1710 //------------------------------ComputeLocalLatenciesForward-------------------
1711 // Compute the latency of all the instructions.  This is fairly simple,
1712 // because we already have a legal ordering.  Walk over the instructions
1713 // from first to last, and compute the latency of the instruction based
1714 // on the latency of the preceding instruction(s).
1715 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1716 #ifndef PRODUCT
1717   if (_cfg->C->trace_opto_output())
1718     tty->print("# -> ComputeLocalLatenciesForward\n");
1719 #endif
1720 
1721   // Walk over all the schedulable instructions
1722   for( uint j=_bb_start; j < _bb_end; j++ ) {
1723 
1724     // This is a kludge, forcing all latency calculations to start at 1.
1725     // Used to allow latency 0 to force an instruction to the beginning
1726     // of the bb
1727     uint latency = 1;




  28 extern uint size_java_to_interp();
  29 extern uint reloc_java_to_interp();
  30 extern uint size_exception_handler();
  31 extern uint size_deopt_handler();
  32 
  33 #ifndef PRODUCT
  34 #define DEBUG_ARG(x) , x
  35 #else
  36 #define DEBUG_ARG(x)
  37 #endif
  38 
  39 extern int emit_exception_handler(CodeBuffer &cbuf);
  40 extern int emit_deopt_handler(CodeBuffer &cbuf);
  41 
  42 //------------------------------Output-----------------------------------------
  43 // Convert Nodes to instruction bits and pass off to the VM
  44 void Compile::Output() {
  45   // RootNode goes
  46   assert( _cfg->_broot->_nodes.size() == 0, "" );
  47 





  48   // The number of new nodes (mostly MachNop) is proportional to
  49   // the number of java calls and inner loops which are aligned.
  50   if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
  51                             C->inner_loops()*(OptoLoopAlignment-1)),
  52                            "out of nodes before code generation" ) ) {
  53     return;
  54   }
  55   // Make sure I can find the Start Node
  56   Block_Array& bbs = _cfg->_bbs;
  57   Block *entry = _cfg->_blocks[1];
  58   Block *broot = _cfg->_broot;
  59 
  60   const StartNode *start = entry->_nodes[0]->as_Start();
  61 
  62   // Replace StartNode with prolog
  63   MachPrologNode *prolog = new (this) MachPrologNode();
  64   entry->_nodes.map( 0, prolog );
  65   bbs.map( prolog->_idx, entry );
  66   bbs.map( start->_idx, NULL ); // start is no longer in any block
  67 


 295         // block(s) does not have enough instructions.
 296         Block *nb = b;
 297         while( inst_cnt > 0 &&
 298                i < last_block &&
 299                !_cfg->_blocks[i+1]->has_loop_alignment() &&
 300                !nb->has_successor(b) ) {
 301           i++;
 302           nb = _cfg->_blocks[i];
 303           inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 304         } // while( inst_cnt > 0 && i < last_block  )
 305 
 306         b->set_first_inst_size(sum_size);
 307       } // f( b->head()->is_Loop() )
 308     } // for( i <= last_block )
 309   } // if( MaxLoopPad < OptoLoopAlignment-1 )
 310 }
 311 
 312 //----------------------Shorten_branches---------------------------------------
 313 // The architecture description provides short branch variants for some long
 314 // branch instructions. Replace eligible long branches with short branches.
 315 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) {
 316 
 317   // fill in the nop array for bundling computations
 318   MachNode *_nop_list[Bundle::_nop_count];
 319   Bundle::initialize_nops(_nop_list, this);
 320 
 321   // ------------------
 322   // Compute size of each block, method size, and relocation information size
 323   uint *jmp_end    = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
 324   uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
 325   DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 326   DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 327   blk_starts[0]    = 0;
 328 
 329   // Initialize the sizes to 0
 330   code_size  = 0;          // Size in bytes of generated code
 331   stub_size  = 0;          // Size in bytes of all stub entries
 332   // Size in bytes of all relocation entries, including those in local stubs.
 333   // Start with 2-bytes of reloc info for the unvalidated entry point
 334   reloc_size = 1;          // Number of relocation entries

 335 
 336   // Make three passes.  The first computes pessimistic blk_starts,
 337   // relative jmp_end and reloc_size information.  The second performs
 338   // short branch substitution using the pessimistic sizing.  The
 339   // third inserts nops where needed.
 340 
 341   Node *nj; // tmp
 342 
 343   // Step one, perform a pessimistic sizing pass.
 344   uint i;
 345   uint min_offset_from_last_call = 1;  // init to a positive value
 346   uint nop_size = (new (this) MachNopNode())->size(_regalloc);
 347   for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
 348     Block *b = _cfg->_blocks[i];
 349 
 350     // Sum all instruction sizes to compute block size
 351     uint last_inst = b->_nodes.size();
 352     uint blk_size = 0;
 353     for( uint j = 0; j<last_inst; j++ ) {
 354       nj = b->_nodes[j];
 355       uint inst_size = nj->size(_regalloc);
 356       blk_size += inst_size;
 357       // Handle machine instruction nodes
 358       if( nj->is_Mach() ) {
 359         MachNode *mach = nj->as_Mach();
 360         blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
 361         reloc_size += mach->reloc();

 362         if( mach->is_MachCall() ) {
 363           MachCallNode *mcall = mach->as_MachCall();
 364           // This destination address is NOT PC-relative
 365 
 366           mcall->method_set((intptr_t)mcall->entry_point());
 367 
 368           if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
 369             stub_size  += size_java_to_interp();
 370             reloc_size += reloc_java_to_interp();
 371           }
 372         } else if (mach->is_MachSafePoint()) {
 373           // If call/safepoint are adjacent, account for possible
 374           // nop to disambiguate the two safepoints.
 375           if (min_offset_from_last_call == 0) {
 376             blk_size += nop_size;
 377           }




 378         }
 379       }
 380       min_offset_from_last_call += inst_size;
 381       // Remember end of call offset
 382       if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
 383         min_offset_from_last_call = 0;
 384       }
 385     }
 386 
 387     // During short branch replacement, we store the relative (to blk_starts)
 388     // end of jump in jmp_end, rather than the absolute end of jump.  This
 389     // is so that we do not need to recompute sizes of all nodes when we compute
 390     // correct blk_starts in our next sizing pass.
 391     jmp_end[i] = blk_size;
 392     DEBUG_ONLY( jmp_target[i] = 0; )
 393 
 394     // When the next block starts a loop, we may insert pad NOP
 395     // instructions.  Since we cannot know our future alignment,
 396     // assume the worst.
 397     if( i<_cfg->_num_blocks-1 ) {


 518       if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) {
 519         tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
 520       }
 521       assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp");
 522     }
 523   }
 524 #endif
 525 
 526   // ------------------
 527   // Compute size for code buffer
 528   code_size   = blk_starts[i-1] + jmp_end[i-1];
 529 
 530   // Relocation records
 531   reloc_size += 1;              // Relo entry for exception handler
 532 
 533   // Adjust reloc_size to number of record of relocation info
 534   // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
 535   // a relocation index.
 536   // The CodeBuffer will expand the locs array if this estimate is too low.
 537   reloc_size   *= 10 / sizeof(relocInfo);




 538 }
 539 
 540 //------------------------------FillLocArray-----------------------------------
 541 // Create a bit of debug info and append it to the array.  The mapping is from
 542 // Java local or expression stack to constant, register or stack-slot.  For
 543 // doubles, insert 2 mappings and return 1 (to tell the caller that the next
 544 // entry has been taken care of and caller should skip it).
 545 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
 546   // This should never have accepted Bad before
 547   assert(OptoReg::is_valid(regnum), "location must be valid");
 548   return (OptoReg::is_reg(regnum))
 549     ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
 550     : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
 551 }
 552 
 553 
 554 ObjectValue*
 555 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
 556   for (int i = 0; i < objs->length(); i++) {
 557     assert(objs->at(i)->is_object(), "corrupt object cache");


1054     // doesn't work if the fp reg to spill contains a single-precision denorm.
1055     // Instead, we hack around the normal spill mechanism using stfspill's and
1056     // ldffill's in the MachProlog and MachEpilog emit methods.  We allocate
1057     // space here for the fp arg regs (f8-f15) we're going to thusly spill.
1058     //
1059     // If we ever implement 16-byte 'registers' == stack slots, we can
1060     // get rid of this hack and have SpillCopy generate stfspill/ldffill
1061     // instead of stfd/stfs/ldfd/ldfs.
1062     _frame_slots += 8*(16/BytesPerInt);
1063   }
1064 #endif
1065   assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
1066 
1067   // Create an array of unused labels, one for each basic block
1068   Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1069 
1070   for( i=0; i <= _cfg->_num_blocks; i++ ) {
1071     blk_labels[i].init();
1072   }
1073 
1074   if (has_mach_constant_base_node()) {
1075     // Fill the constant table.
1076     // Note:  This must happen before Shorten_branches.
1077     for (i = 0; i < _cfg->_num_blocks; i++) {
1078       Block* b = _cfg->_blocks[i];
1079 
1080       for (uint j = 0; j < b->_nodes.size(); j++) {
1081         Node* n = b->_nodes[j];
1082 
1083         if (n->is_Mach()) {
1084           MachNode *mach = n->as_Mach();
1085 
1086           // If the MachNode is a MachConstantNode evaluate the
1087           // constant value section.
1088           if (mach->is_MachConstant()) {
1089             MachConstantNode* machcon = mach->as_MachConstant();
1090             machcon->eval_constant();
1091           }
1092         }
1093       }
1094     }
1095 
1096     // Calculate the size of the constant table (including the padding
1097     // to the next section).
1098     const_req = mach_constant_base_node()->calculate_constant_table_size();
1099   }
1100 
1101   // Initialize the space for the BufferBlob used to find and verify
1102   // instruction size in MachNode::emit_size()
1103   init_scratch_buffer_blob(const_req);
1104   if (failing())  return; // Out of memory
1105 
1106   // If this machine supports different size branch offsets, then pre-compute
1107   // the length of the blocks
1108   if( _matcher->is_short_branch_offset(-1, 0) ) {
1109     Shorten_branches(blk_labels, code_req, locs_req, stub_req);
1110     labels_not_set = false;
1111   }
1112 
1113   // nmethod and CodeBuffer count stubs & constants as part of method's code.
1114   int exception_handler_req = size_exception_handler();
1115   int deopt_handler_req = size_deopt_handler();
1116   exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1117   deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1118   stub_req += MAX_stubs_size;   // ensure per-stub margin
1119   code_req += MAX_inst_size;    // ensure per-instruction margin
1120 
1121   if (StressCodeBuffers)
1122     code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
1123 
1124   int total_req =
1125     const_req +
1126     code_req +
1127     pad_req +
1128     stub_req +
1129     exception_handler_req +
1130     deopt_handler_req;               // deopt handler

1131 
1132   if (has_method_handle_invokes())
1133     total_req += deopt_handler_req;  // deopt MH handler
1134 
1135   CodeBuffer* cb = code_buffer();
1136   cb->initialize(total_req, locs_req);
1137 
1138   // Have we run out of code space?
1139   if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1140     turn_off_compiler(this);
1141     return;
1142   }
1143   // Configure the code buffer.
1144   cb->initialize_consts_size(const_req);
1145   cb->initialize_stubs_size(stub_req);
1146   cb->initialize_oop_recorder(env()->oop_recorder());
1147 
1148   // fill in the nop array for bundling computations
1149   MachNode *_nop_list[Bundle::_nop_count];
1150   Bundle::initialize_nops(_nop_list, this);


1180 #endif
1181 
1182   NonSafepointEmitter non_safepoints(this);  // emit non-safepoints lazily
1183 
1184   // ------------------
1185   // Now fill in the code buffer
1186   Node *delay_slot = NULL;
1187 
1188   for( i=0; i < _cfg->_num_blocks; i++ ) {
1189     Block *b = _cfg->_blocks[i];
1190 
1191     Node *head = b->head();
1192 
1193     // If this block needs to start aligned (i.e, can be reached other
1194     // than by falling-thru from the previous block), then force the
1195     // start of a new bundle.
1196     if( Pipeline::requires_bundling() && starts_bundle(head) )
1197       cb->flush_bundle(true);
1198 
1199     // Define the label at the beginning of the basic block
1200     if (labels_not_set) {
1201       MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
1202     } else {
1203       assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1204              err_msg("label position does not match code offset: %d != %d",
1205                      blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
1206     }
1207 
1208     uint last_inst = b->_nodes.size();
1209 
1210     // Emit block normally, except for last instruction.
1211     // Emit means "dump code bits into code buffer".
1212     for( uint j = 0; j<last_inst; j++ ) {
1213 
1214       // Get the node
1215       Node* n = b->_nodes[j];
1216 
1217       // See if delay slots are supported
1218       if (valid_bundle_info(n) &&
1219           node_bundling(n)->used_in_unconditional_delay()) {
1220         assert(delay_slot == NULL, "no use of delay slot node");
1221         assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
1222 
1223         delay_slot = n;
1224         continue;
1225       }
1226 


1703 }
1704 
1705 //------------------------------ScheduleAndBundle------------------------------
1706 // Perform instruction scheduling and bundling over the sequence of
1707 // instructions in backwards order.
1708 void Compile::ScheduleAndBundle() {
1709 
1710   // Don't optimize this if it isn't a method
1711   if (!_method)
1712     return;
1713 
1714   // Don't optimize this if scheduling is disabled
1715   if (!do_scheduling())
1716     return;
1717 
1718   NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1719 
1720   // Create a data structure for all the scheduling information
1721   Scheduling scheduling(Thread::current()->resource_area(), *this);
1722 
1723   // Initialize the space for the BufferBlob used to find and verify
1724   // instruction size in MachNode::emit_size()
1725   init_scratch_buffer_blob(MAX_const_size);
1726   if (failing())  return;  // Out of memory
1727 
1728   // Walk backwards over each basic block, computing the needed alignment
1729   // Walk over all the basic blocks
1730   scheduling.DoScheduling();
1731 
1732   // Clear the BufferBlob used for scheduling.
1733   clear_scratch_buffer_blob();
1734 }
1735 
1736 //------------------------------ComputeLocalLatenciesForward-------------------
1737 // Compute the latency of all the instructions.  This is fairly simple,
1738 // because we already have a legal ordering.  Walk over the instructions
1739 // from first to last, and compute the latency of the instruction based
1740 // on the latency of the preceding instruction(s).
1741 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1742 #ifndef PRODUCT
1743   if (_cfg->C->trace_opto_output())
1744     tty->print("# -> ComputeLocalLatenciesForward\n");
1745 #endif
1746 
1747   // Walk over all the schedulable instructions
1748   for( uint j=_bb_start; j < _bb_end; j++ ) {
1749 
1750     // This is a kludge, forcing all latency calculations to start at 1.
1751     // Used to allow latency 0 to force an instruction to the beginning
1752     // of the bb
1753     uint latency = 1;


src/share/vm/opto/output.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File