Print this page
rev 1838 : 6961690: load oops from constant table on SPARC
Summary: oops should be loaded from the constant table of an nmethod instead of materializing them with a long code sequence.
Reviewed-by:

Split Close
Expand all
Collapse all
          --- old/src/share/vm/opto/output.cpp
          +++ new/src/share/vm/opto/output.cpp
↓ open down ↓ 37 lines elided ↑ open up ↑
  38   38  
  39   39  extern int emit_exception_handler(CodeBuffer &cbuf);
  40   40  extern int emit_deopt_handler(CodeBuffer &cbuf);
  41   41  
  42   42  //------------------------------Output-----------------------------------------
  43   43  // Convert Nodes to instruction bits and pass off to the VM
  44   44  void Compile::Output() {
  45   45    // RootNode goes
  46   46    assert( _cfg->_broot->_nodes.size() == 0, "" );
  47   47  
  48      -  // Initialize the space for the BufferBlob used to find and verify
  49      -  // instruction size in MachNode::emit_size()
  50      -  init_scratch_buffer_blob();
  51      -  if (failing())  return; // Out of memory
  52      -
  53   48    // The number of new nodes (mostly MachNop) is proportional to
  54   49    // the number of java calls and inner loops which are aligned.
  55   50    if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
  56   51                              C->inner_loops()*(OptoLoopAlignment-1)),
  57   52                             "out of nodes before code generation" ) ) {
  58   53      return;
  59   54    }
  60   55    // Make sure I can find the Start Node
  61   56    Block_Array& bbs = _cfg->_bbs;
  62   57    Block *entry = _cfg->_blocks[1];
↓ open down ↓ 247 lines elided ↑ open up ↑
 310  305  
 311  306          b->set_first_inst_size(sum_size);
 312  307        } // f( b->head()->is_Loop() )
 313  308      } // for( i <= last_block )
 314  309    } // if( MaxLoopPad < OptoLoopAlignment-1 )
 315  310  }
 316  311  
 317  312  //----------------------Shorten_branches---------------------------------------
 318  313  // The architecture description provides short branch variants for some long
 319  314  // branch instructions. Replace eligible long branches with short branches.
 320      -void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) {
      315 +void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) {
 321  316  
 322  317    // fill in the nop array for bundling computations
 323  318    MachNode *_nop_list[Bundle::_nop_count];
 324  319    Bundle::initialize_nops(_nop_list, this);
 325  320  
 326  321    // ------------------
 327  322    // Compute size of each block, method size, and relocation information size
 328  323    uint *jmp_end    = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
 329  324    uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
 330  325    DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 331  326    DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
 332  327    blk_starts[0]    = 0;
 333  328  
 334  329    // Initialize the sizes to 0
 335  330    code_size  = 0;          // Size in bytes of generated code
 336  331    stub_size  = 0;          // Size in bytes of all stub entries
 337  332    // Size in bytes of all relocation entries, including those in local stubs.
 338  333    // Start with 2-bytes of reloc info for the unvalidated entry point
 339  334    reloc_size = 1;          // Number of relocation entries
 340      -  const_size = 0;          // size of fp constants in words
 341  335  
 342  336    // Make three passes.  The first computes pessimistic blk_starts,
 343      -  // relative jmp_end, reloc_size and const_size information.
 344      -  // The second performs short branch substitution using the pessimistic
 345      -  // sizing. The third inserts nops where needed.
      337 +  // relative jmp_end and reloc_size information.  The second performs
      338 +  // short branch substitution using the pessimistic sizing.  The
      339 +  // third inserts nops where needed.
 346  340  
 347  341    Node *nj; // tmp
 348  342  
 349  343    // Step one, perform a pessimistic sizing pass.
 350  344    uint i;
 351  345    uint min_offset_from_last_call = 1;  // init to a positive value
 352  346    uint nop_size = (new (this) MachNopNode())->size(_regalloc);
 353  347    for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
 354  348      Block *b = _cfg->_blocks[i];
 355  349  
↓ open down ↓ 2 lines elided ↑ open up ↑
 358  352      uint blk_size = 0;
 359  353      for( uint j = 0; j<last_inst; j++ ) {
 360  354        nj = b->_nodes[j];
 361  355        uint inst_size = nj->size(_regalloc);
 362  356        blk_size += inst_size;
 363  357        // Handle machine instruction nodes
 364  358        if( nj->is_Mach() ) {
 365  359          MachNode *mach = nj->as_Mach();
 366  360          blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
 367  361          reloc_size += mach->reloc();
 368      -        const_size += mach->const_size();
 369  362          if( mach->is_MachCall() ) {
 370  363            MachCallNode *mcall = mach->as_MachCall();
 371  364            // This destination address is NOT PC-relative
 372  365  
 373  366            mcall->method_set((intptr_t)mcall->entry_point());
 374  367  
 375  368            if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
 376  369              stub_size  += size_java_to_interp();
 377  370              reloc_size += reloc_java_to_interp();
 378  371            }
 379  372          } else if (mach->is_MachSafePoint()) {
 380  373            // If call/safepoint are adjacent, account for possible
 381  374            // nop to disambiguate the two safepoints.
 382  375            if (min_offset_from_last_call == 0) {
 383  376              blk_size += nop_size;
 384  377            }
 385      -        } else if (mach->ideal_Opcode() == Op_Jump) {
 386      -          const_size += b->_num_succs; // Address table size
 387      -          // The size is valid even for 64 bit since it is
 388      -          // multiplied by 2*jintSize on this method exit.
 389  378          }
 390  379        }
 391  380        min_offset_from_last_call += inst_size;
 392  381        // Remember end of call offset
 393  382        if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
 394  383          min_offset_from_last_call = 0;
 395  384        }
 396  385      }
 397  386  
 398  387      // During short branch replacement, we store the relative (to blk_starts)
↓ open down ↓ 140 lines elided ↑ open up ↑
 539  528    code_size   = blk_starts[i-1] + jmp_end[i-1];
 540  529  
 541  530    // Relocation records
 542  531    reloc_size += 1;              // Relo entry for exception handler
 543  532  
 544  533    // Adjust reloc_size to number of record of relocation info
 545  534    // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
 546  535    // a relocation index.
 547  536    // The CodeBuffer will expand the locs array if this estimate is too low.
 548  537    reloc_size   *= 10 / sizeof(relocInfo);
 549      -
 550      -  // Adjust const_size to number of bytes
 551      -  const_size   *= 2*jintSize; // both float and double take two words per entry
 552      -
 553  538  }
 554  539  
 555  540  //------------------------------FillLocArray-----------------------------------
 556  541  // Create a bit of debug info and append it to the array.  The mapping is from
 557  542  // Java local or expression stack to constant, register or stack-slot.  For
 558  543  // doubles, insert 2 mappings and return 1 (to tell the caller that the next
 559  544  // entry has been taken care of and caller should skip it).
 560  545  static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
 561  546    // This should never have accepted Bad before
 562  547    assert(OptoReg::is_valid(regnum), "location must be valid");
↓ open down ↓ 516 lines elided ↑ open up ↑
1079 1064  #endif
1080 1065    assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
1081 1066  
1082 1067    // Create an array of unused labels, one for each basic block
1083 1068    Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
1084 1069  
1085 1070    for( i=0; i <= _cfg->_num_blocks; i++ ) {
1086 1071      blk_labels[i].init();
1087 1072    }
1088 1073  
     1074 +  if (has_mach_constant_base_node()) {
     1075 +    // Fill the constant table.
     1076 +    // Note:  This must happen before Shorten_branches.
     1077 +    for (i = 0; i < _cfg->_num_blocks; i++) {
     1078 +      Block* b = _cfg->_blocks[i];
     1079 +
     1080 +      for (uint j = 0; j < b->_nodes.size(); j++) {
     1081 +        Node* n = b->_nodes[j];
     1082 +
     1083 +        if (n->is_Mach()) {
     1084 +          MachNode *mach = n->as_Mach();
     1085 +
     1086 +          // If the MachNode is a MachConstantNode evaluate the
     1087 +          // constant value section.
     1088 +          if (mach->is_MachConstant()) {
     1089 +            MachConstantNode* machcon = mach->as_MachConstant();
     1090 +            machcon->eval_constant();
     1091 +          }
     1092 +        }
     1093 +      }
     1094 +    }
     1095 +
     1096 +    // Calculate the size of the constant table (including the padding
     1097 +    // to the next section).
     1098 +    const_req = mach_constant_base_node()->calculate_constant_table_size();
     1099 +  }
     1100 +
     1101 +  // Initialize the space for the BufferBlob used to find and verify
     1102 +  // instruction size in MachNode::emit_size()
     1103 +  init_scratch_buffer_blob(const_req);
     1104 +  if (failing())  return; // Out of memory
     1105 +
1089 1106    // If this machine supports different size branch offsets, then pre-compute
1090 1107    // the length of the blocks
1091 1108    if( _matcher->is_short_branch_offset(-1, 0) ) {
1092      -    Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req);
     1109 +    Shorten_branches(blk_labels, code_req, locs_req, stub_req);
1093 1110      labels_not_set = false;
1094 1111    }
1095 1112  
1096 1113    // nmethod and CodeBuffer count stubs & constants as part of method's code.
1097 1114    int exception_handler_req = size_exception_handler();
1098 1115    int deopt_handler_req = size_deopt_handler();
1099 1116    exception_handler_req += MAX_stubs_size; // add marginal slop for handler
1100 1117    deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
1101 1118    stub_req += MAX_stubs_size;   // ensure per-stub margin
1102 1119    code_req += MAX_inst_size;    // ensure per-instruction margin
1103 1120  
1104 1121    if (StressCodeBuffers)
1105 1122      code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
1106 1123  
1107 1124    int total_req =
     1125 +    const_req +
1108 1126      code_req +
1109 1127      pad_req +
1110 1128      stub_req +
1111 1129      exception_handler_req +
1112      -    deopt_handler_req +              // deopt handler
1113      -    const_req;
     1130 +    deopt_handler_req;               // deopt handler
1114 1131  
1115 1132    if (has_method_handle_invokes())
1116 1133      total_req += deopt_handler_req;  // deopt MH handler
1117 1134  
1118 1135    CodeBuffer* cb = code_buffer();
1119 1136    cb->initialize(total_req, locs_req);
1120 1137  
1121 1138    // Have we run out of code space?
1122 1139    if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
1123 1140      turn_off_compiler(this);
↓ open down ↓ 49 lines elided ↑ open up ↑
1173 1190  
1174 1191      Node *head = b->head();
1175 1192  
1176 1193      // If this block needs to start aligned (i.e, can be reached other
1177 1194      // than by falling-thru from the previous block), then force the
1178 1195      // start of a new bundle.
1179 1196      if( Pipeline::requires_bundling() && starts_bundle(head) )
1180 1197        cb->flush_bundle(true);
1181 1198  
1182 1199      // Define the label at the beginning of the basic block
1183      -    if( labels_not_set )
1184      -      MacroAssembler(cb).bind( blk_labels[b->_pre_order] );
1185      -
1186      -    else
1187      -      assert( blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
1188      -              "label position does not match code offset" );
     1200 +    if (labels_not_set) {
     1201 +      MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
     1202 +    } else {
     1203 +      assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
     1204 +             err_msg("label position does not match code offset: %d != %d",
     1205 +                     blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
     1206 +    }
1189 1207  
1190 1208      uint last_inst = b->_nodes.size();
1191 1209  
1192 1210      // Emit block normally, except for last instruction.
1193 1211      // Emit means "dump code bits into code buffer".
1194 1212      for( uint j = 0; j<last_inst; j++ ) {
1195 1213  
1196 1214        // Get the node
1197 1215        Node* n = b->_nodes[j];
1198 1216  
↓ open down ↓ 496 lines elided ↑ open up ↑
1695 1713  
1696 1714    // Don't optimize this if scheduling is disabled
1697 1715    if (!do_scheduling())
1698 1716      return;
1699 1717  
1700 1718    NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
1701 1719  
1702 1720    // Create a data structure for all the scheduling information
1703 1721    Scheduling scheduling(Thread::current()->resource_area(), *this);
1704 1722  
     1723 +  // Initialize the space for the BufferBlob used to find and verify
     1724 +  // instruction size in MachNode::emit_size()
     1725 +  init_scratch_buffer_blob(MAX_const_size);
     1726 +  if (failing())  return;  // Out of memory
     1727 +
1705 1728    // Walk backwards over each basic block, computing the needed alignment
1706 1729    // Walk over all the basic blocks
1707 1730    scheduling.DoScheduling();
     1731 +
     1732 +  // Clear the BufferBlob used for scheduling.
     1733 +  clear_scratch_buffer_blob();
1708 1734  }
1709 1735  
1710 1736  //------------------------------ComputeLocalLatenciesForward-------------------
1711 1737  // Compute the latency of all the instructions.  This is fairly simple,
1712 1738  // because we already have a legal ordering.  Walk over the instructions
1713 1739  // from first to last, and compute the latency of the instruction based
1714 1740  // on the latency of the preceding instruction(s).
1715 1741  void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
1716 1742  #ifndef PRODUCT
1717 1743    if (_cfg->C->trace_opto_output())
↓ open down ↓ 1088 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX